1 /* $NetBSD: pmap.c,v 1.308 2023/12/30 23:07:42 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center, by Andrew Doran and Mindaugas Rasiukevicius, 11 * and by Chris G. Demetriou. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Copyright (c) 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * the Systems Programming Group of the University of Utah Computer 41 * Science Department. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)pmap.c 8.6 (Berkeley) 5/27/94 68 */ 69 70 /* 71 * DEC Alpha physical map management code. 72 * 73 * History: 74 * 75 * This pmap started life as a Motorola 68851/68030 pmap, 76 * written by Mike Hibler at the University of Utah. 77 * 78 * It was modified for the DEC Alpha by Chris Demetriou 79 * at Carnegie Mellon University. 80 * 81 * Support for non-contiguous physical memory was added by 82 * Jason R. Thorpe of the Numerical Aerospace Simulation 83 * Facility, NASA Ames Research Center and Chris Demetriou. 84 * 85 * Page table management and a major cleanup were undertaken 86 * by Jason R. Thorpe, with lots of help from Ross Harvey of 87 * Avalon Computer Systems and from Chris Demetriou. 88 * 89 * Support for the new UVM pmap interface was written by 90 * Jason R. Thorpe. 91 * 92 * Support for ASNs was written by Jason R. Thorpe, again 93 * with help from Chris Demetriou and Ross Harvey. 94 * 95 * The locking protocol was written by Jason R. Thorpe, 96 * using Chuck Cranor's i386 pmap for UVM as a model. 97 * 98 * TLB shootdown code was written (and then subsequently 99 * rewritten some years later, borrowing some ideas from 100 * the x86 pmap) by Jason R. Thorpe. 101 * 102 * Multiprocessor modifications by Andrew Doran and 103 * Jason R. Thorpe. 104 * 105 * Notes: 106 * 107 * All user page table access is done via K0SEG. Kernel 108 * page table access is done via the recursive Virtual Page 109 * Table because kernel PT pages are pre-allocated and never 110 * freed, so no VPT fault handling is required. 111 */ 112 113 /* 114 * Manages physical address maps. 115 * 116 * Since the information managed by this module is 117 * also stored by the logical address mapping module, 118 * this module may throw away valid virtual-to-physical 119 * mappings at almost any time. However, invalidations 120 * of virtual-to-physical mappings must be done as 121 * requested. 122 * 123 * In order to cope with hardware architectures which 124 * make virtual-to-physical map invalidates expensive, 125 * this module may delay invalidate or reduced protection 126 * operations until such time as they are actually 127 * necessary. This module is given full information as 128 * to which processors are currently using which maps, 129 * and to when physical maps must be made correct. 130 */ 131 132 #include "opt_lockdebug.h" 133 #include "opt_sysv.h" 134 #include "opt_multiprocessor.h" 135 136 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 137 138 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.308 2023/12/30 23:07:42 thorpej Exp $"); 139 140 #include <sys/param.h> 141 #include <sys/systm.h> 142 #include <sys/kernel.h> 143 #include <sys/proc.h> 144 #include <sys/pool.h> 145 #include <sys/buf.h> 146 #include <sys/evcnt.h> 147 #include <sys/atomic.h> 148 #include <sys/cpu.h> 149 150 #include <uvm/uvm.h> 151 152 #if defined(MULTIPROCESSOR) 153 #include <machine/rpb.h> 154 #endif 155 156 #ifdef DEBUG 157 #define PDB_FOLLOW 0x0001 158 #define PDB_INIT 0x0002 159 #define PDB_ENTER 0x0004 160 #define PDB_REMOVE 0x0008 161 #define PDB_CREATE 0x0010 162 #define PDB_PTPAGE 0x0020 163 #define PDB_ASN 0x0040 164 #define PDB_BITS 0x0080 165 #define PDB_COLLECT 0x0100 166 #define PDB_PROTECT 0x0200 167 #define PDB_BOOTSTRAP 0x1000 168 #define PDB_PARANOIA 0x2000 169 #define PDB_WIRING 0x4000 170 #define PDB_PVDUMP 0x8000 171 172 int debugmap = 0; 173 int pmapdebug = PDB_PARANOIA; 174 #endif 175 176 #if defined(MULTIPROCESSOR) 177 #define PMAP_MP(x) x 178 #else 179 #define PMAP_MP(x) __nothing 180 #endif /* MULTIPROCESSOR */ 181 182 /* 183 * Given a map and a machine independent protection code, 184 * convert to an alpha protection code. 185 */ 186 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p]) 187 static int protection_codes[2][8] __read_mostly; 188 189 /* 190 * kernel_lev1map: 191 * 192 * Kernel level 1 page table. This maps all kernel level 2 193 * page table pages, and is used as a template for all user 194 * pmap level 1 page tables. When a new user level 1 page 195 * table is allocated, all kernel_lev1map PTEs for kernel 196 * addresses are copied to the new map. 197 * 198 * The kernel also has an initial set of kernel level 2 page 199 * table pages. These map the kernel level 3 page table pages. 200 * As kernel level 3 page table pages are added, more level 2 201 * page table pages may be added to map them. These pages are 202 * never freed. 203 * 204 * Finally, the kernel also has an initial set of kernel level 205 * 3 page table pages. These map pages in K1SEG. More level 206 * 3 page table pages may be added at run-time if additional 207 * K1SEG address space is required. These pages are never freed. 208 * 209 * NOTE: When mappings are inserted into the kernel pmap, all 210 * level 2 and level 3 page table pages must already be allocated 211 * and mapped into the parent page table. 212 */ 213 pt_entry_t *kernel_lev1map __read_mostly; 214 215 /* 216 * Virtual Page Table. 217 */ 218 static pt_entry_t *VPT __read_mostly; 219 220 static struct { 221 struct pmap k_pmap; 222 } kernel_pmap_store __cacheline_aligned; 223 224 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap; 225 226 /* PA of first available physical page */ 227 paddr_t avail_start __read_mostly; 228 229 /* PA of last available physical page */ 230 paddr_t avail_end __read_mostly; 231 232 /* VA of last avail page (end of kernel AS) */ 233 static vaddr_t virtual_end __read_mostly; 234 235 /* Has pmap_init completed? */ 236 static bool pmap_initialized __read_mostly; 237 238 /* Instrumentation */ 239 u_long pmap_pages_stolen __read_mostly; 240 241 /* 242 * This variable contains the number of CPU IDs we need to allocate 243 * space for when allocating the pmap structure. It is used to 244 * size a per-CPU array of ASN and ASN Generation number. 245 */ 246 static u_long pmap_ncpuids __read_mostly; 247 248 #ifndef PMAP_PV_LOWAT 249 #define PMAP_PV_LOWAT 16 250 #endif 251 int pmap_pv_lowat __read_mostly = PMAP_PV_LOWAT; 252 253 /* 254 * List of all pmaps, used to update them when e.g. additional kernel 255 * page tables are allocated. This list is kept LRU-ordered by 256 * pmap_activate(). 257 */ 258 static TAILQ_HEAD(, pmap) pmap_all_pmaps __cacheline_aligned; 259 260 /* 261 * Instrument the number of calls to pmap_growkernel(). 262 */ 263 static struct evcnt pmap_growkernel_evcnt __read_mostly; 264 265 /* 266 * The pools from which pmap structures and sub-structures are allocated. 267 */ 268 static struct pool_cache pmap_pmap_cache __read_mostly; 269 static struct pool_cache pmap_l1pt_cache __read_mostly; 270 static struct pool_cache pmap_pv_cache __read_mostly; 271 272 CTASSERT(offsetof(struct pmap, pm_percpu[0]) == COHERENCY_UNIT); 273 CTASSERT(PMAP_SIZEOF(ALPHA_MAXPROCS) < ALPHA_PGBYTES); 274 CTASSERT(sizeof(struct pmap_percpu) == COHERENCY_UNIT); 275 276 /* 277 * Address Space Numbers. 278 * 279 * On many implementations of the Alpha architecture, the TLB entries and 280 * I-cache blocks are tagged with a unique number within an implementation- 281 * specified range. When a process context becomes active, the ASN is used 282 * to match TLB entries; if a TLB entry for a particular VA does not match 283 * the current ASN, it is ignored (one could think of the processor as 284 * having a collection of <max ASN> separate TLBs). This allows operating 285 * system software to skip the TLB flush that would otherwise be necessary 286 * at context switch time. 287 * 288 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that 289 * causes TLB entries to match any ASN. The PALcode also provides 290 * a TBI (Translation Buffer Invalidate) operation that flushes all 291 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel 292 * mappings, so that invalidation of all user mappings does not invalidate 293 * kernel mappings (which are consistent across all processes). 294 * 295 * pmap_next_asn always indicates to the next ASN to use. When 296 * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation. 297 * 298 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM) 299 * TLB entries and the I-cache are flushed, the generation number is bumped, 300 * and pmap_next_asn is changed to indicate the first non-reserved ASN. 301 * 302 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This 303 * prevents the following scenario to ensure no accidental accesses to 304 * user space for LWPs using the kernel pmap. This is important because 305 * the PALcode may use the recursive VPT to service TLB misses. 306 * 307 * By reserving an ASN for the kernel, we are guaranteeing that an lwp 308 * will not see any valid user space TLB entries until it passes through 309 * pmap_activate() for the first time. 310 * 311 * On processors that do not support ASNs, the PALcode invalidates 312 * non-ASM TLB entries automatically on swpctx. We completely skip 313 * the ASN machinery in this case because the PALcode neither reads 314 * nor writes that field of the HWPCB. 315 */ 316 317 /* max ASN supported by the system */ 318 static u_int pmap_max_asn __read_mostly; 319 320 /* 321 * Locking: 322 * 323 * READ/WRITE LOCKS 324 * ---------------- 325 * 326 * * pmap_main_lock - This lock is used to prevent deadlock and/or 327 * provide mutex access to the pmap module. Most operations lock 328 * the pmap first, then PV lists as needed. However, some operations, 329 * such as pmap_page_protect(), lock the PV lists before locking 330 * the pmaps. To prevent deadlock, we require a mutex lock on the 331 * pmap module if locking in the PV->pmap direction. This is 332 * implemented by acquiring a (shared) read lock on pmap_main_lock 333 * if locking pmap->PV and a (exclusive) write lock if locking in 334 * the PV->pmap direction. Since only one thread can hold a write 335 * lock at a time, this provides the mutex. 336 * 337 * MUTEXES 338 * ------- 339 * 340 * * pmap lock (global hash) - These locks protect the pmap structures. 341 * 342 * * pmap activation lock (global hash) - These IPL_SCHED spin locks 343 * synchronize pmap_activate() and TLB shootdowns. This has a lock 344 * ordering constraint with the tlb_lock: 345 * 346 * tlb_lock -> pmap activation lock 347 * 348 * * pvh_lock (global hash) - These locks protect the PV lists for 349 * managed pages. 350 * 351 * * tlb_lock - This IPL_VM lock serializes local and remote TLB 352 * invalidation. 353 * 354 * * pmap_all_pmaps_lock - This lock protects the global list of 355 * all pmaps. 356 * 357 * * pmap_growkernel_lock - This lock protects pmap_growkernel() 358 * and the virtual_end variable. 359 * 360 * There is a lock ordering constraint for pmap_growkernel_lock. 361 * pmap_growkernel() acquires the locks in the following order: 362 * 363 * pmap_growkernel_lock (write) -> pmap_all_pmaps_lock -> 364 * pmap lock 365 * 366 * We need to ensure consistency between user pmaps and the 367 * kernel_lev1map. For this reason, pmap_growkernel_lock must 368 * be held to prevent kernel_lev1map changing across pmaps 369 * being added to / removed from the global pmaps list. 370 * 371 * Address space number management (global ASN counters and per-pmap 372 * ASN state) are not locked; they use arrays of values indexed 373 * per-processor. 374 * 375 * All internal functions which operate on a pmap are called 376 * with the pmap already locked by the caller (which will be 377 * an interface function). 378 */ 379 static krwlock_t pmap_main_lock __cacheline_aligned; 380 static kmutex_t pmap_all_pmaps_lock __cacheline_aligned; 381 static krwlock_t pmap_growkernel_lock __cacheline_aligned; 382 383 #define PMAP_MAP_TO_HEAD_LOCK() rw_enter(&pmap_main_lock, RW_READER) 384 #define PMAP_MAP_TO_HEAD_UNLOCK() rw_exit(&pmap_main_lock) 385 #define PMAP_HEAD_TO_MAP_LOCK() rw_enter(&pmap_main_lock, RW_WRITER) 386 #define PMAP_HEAD_TO_MAP_UNLOCK() rw_exit(&pmap_main_lock) 387 388 static union { 389 kmutex_t lock; 390 uint8_t pad[COHERENCY_UNIT]; 391 } pmap_pvh_locks[64] __cacheline_aligned; 392 393 #define PVH_LOCK_HASH(pg) \ 394 ((((uintptr_t)(pg)) >> 6) & 63) 395 396 static inline kmutex_t * 397 pmap_pvh_lock(struct vm_page *pg) 398 { 399 return &pmap_pvh_locks[PVH_LOCK_HASH(pg)].lock; 400 } 401 402 static union { 403 struct { 404 kmutex_t lock; 405 kmutex_t activation_lock; 406 } locks; 407 uint8_t pad[COHERENCY_UNIT]; 408 } pmap_pmap_locks[64] __cacheline_aligned; 409 410 #define PMAP_LOCK_HASH(pm) \ 411 ((((uintptr_t)(pm)) >> 6) & 63) 412 413 static inline kmutex_t * 414 pmap_pmap_lock(pmap_t const pmap) 415 { 416 return &pmap_pmap_locks[PMAP_LOCK_HASH(pmap)].locks.lock; 417 } 418 419 static inline kmutex_t * 420 pmap_activation_lock(pmap_t const pmap) 421 { 422 return &pmap_pmap_locks[PMAP_LOCK_HASH(pmap)].locks.activation_lock; 423 } 424 425 #define PMAP_LOCK(pmap) mutex_enter(pmap_pmap_lock(pmap)) 426 #define PMAP_UNLOCK(pmap) mutex_exit(pmap_pmap_lock(pmap)) 427 428 #define PMAP_ACT_LOCK(pmap) mutex_spin_enter(pmap_activation_lock(pmap)) 429 #define PMAP_ACT_TRYLOCK(pmap) mutex_tryenter(pmap_activation_lock(pmap)) 430 #define PMAP_ACT_UNLOCK(pmap) mutex_spin_exit(pmap_activation_lock(pmap)) 431 432 #if defined(MULTIPROCESSOR) 433 #define pmap_all_cpus() cpus_running 434 #else 435 #define pmap_all_cpus() ~0UL 436 #endif /* MULTIPROCESSOR */ 437 438 /* 439 * TLB context structure; see description in "TLB management" section 440 * below. 441 */ 442 #define TLB_CTX_MAXVA 8 443 #define TLB_CTX_ALLVA PAGE_MASK 444 struct pmap_tlb_context { 445 uintptr_t t_addrdata[TLB_CTX_MAXVA]; 446 pmap_t t_pmap; 447 struct pmap_pagelist t_freeptq; 448 struct pmap_pvlist t_freepvq; 449 }; 450 451 /* 452 * Internal routines 453 */ 454 static void alpha_protection_init(void); 455 static pt_entry_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, 456 pv_entry_t *, 457 struct pmap_tlb_context *); 458 static void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, 459 struct pmap_tlb_context *); 460 461 /* 462 * PT page management functions. 463 */ 464 static int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int); 465 static void pmap_ptpage_free(pmap_t, pt_entry_t *, 466 struct pmap_tlb_context *); 467 static void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, 468 struct pmap_tlb_context *); 469 static void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, 470 struct pmap_tlb_context *); 471 static void pmap_l1pt_delref(pmap_t, pt_entry_t *); 472 473 static void *pmap_l1pt_alloc(struct pool *, int); 474 static void pmap_l1pt_free(struct pool *, void *); 475 476 static struct pool_allocator pmap_l1pt_allocator = { 477 pmap_l1pt_alloc, pmap_l1pt_free, 0, 478 }; 479 480 static int pmap_l1pt_ctor(void *, void *, int); 481 482 /* 483 * PV table management functions. 484 */ 485 static int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *, 486 bool, pv_entry_t); 487 static void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool, 488 pv_entry_t *, struct pmap_tlb_context *); 489 static void *pmap_pv_page_alloc(struct pool *, int); 490 static void pmap_pv_page_free(struct pool *, void *); 491 492 static struct pool_allocator pmap_pv_page_allocator = { 493 pmap_pv_page_alloc, pmap_pv_page_free, 0, 494 }; 495 496 #ifdef DEBUG 497 void pmap_pv_dump(paddr_t); 498 #endif 499 500 #define pmap_pv_alloc() pool_cache_get(&pmap_pv_cache, PR_NOWAIT) 501 #define pmap_pv_free(pv) pool_cache_put(&pmap_pv_cache, (pv)) 502 503 /* 504 * Generic routine for freeing pages on a pmap_pagelist back to 505 * the system. 506 */ 507 static void 508 pmap_pagelist_free(struct pmap_pagelist * const list) 509 { 510 struct vm_page *pg; 511 512 while ((pg = LIST_FIRST(list)) != NULL) { 513 LIST_REMOVE(pg, pageq.list); 514 /* Fix up ref count; it's not always 0 when we get here. */ 515 PHYSPAGE_REFCNT_SET(pg, 0); 516 uvm_pagefree(pg); 517 } 518 } 519 520 /* 521 * Generic routine for freeing a list of PV entries back to the 522 * system. 523 */ 524 static void 525 pmap_pvlist_free(struct pmap_pvlist * const list) 526 { 527 pv_entry_t pv; 528 529 while ((pv = LIST_FIRST(list)) != NULL) { 530 LIST_REMOVE(pv, pv_link); 531 pmap_pv_free(pv); 532 } 533 } 534 535 /* 536 * TLB management. 537 * 538 * TLB invalidations need to be performed on local and remote CPUs 539 * whenever parts of the PTE that the hardware or PALcode understands 540 * changes. In order amortize the cost of these operations, we will 541 * queue up to 8 addresses to invalidate in a batch. Any more than 542 * that, and we will hit the entire TLB. 543 * 544 * Some things that add complexity: 545 * 546 * ==> ASNs. A CPU may have valid TLB entries for other than the current 547 * address space. We can only invalidate TLB entries for the current 548 * address space, so when asked to invalidate a VA for the non-current 549 * pmap on a given CPU, we simply invalidate the ASN for that pmap,CPU 550 * tuple so that new one is allocated on the next activation on that 551 * CPU. N.B. that for CPUs that don't implement ASNs, SWPCTX does all 552 * the work necessary, so we can skip some work in the pmap module 553 * itself. 554 * 555 * When a pmap is activated on a given CPU, we set a corresponding 556 * bit in pmap::pm_cpus, indicating that it potentially has valid 557 * TLB entries for that address space. This bitmap is then used to 558 * determine which remote CPUs need to be notified of invalidations. 559 * The bit is cleared when the ASN is invalidated on that CPU. 560 * 561 * In order to serialize with activating an address space on a 562 * given CPU (that we can reliably send notifications only to 563 * relevant remote CPUs), we acquire the pmap lock in pmap_activate() 564 * and also hold the lock while remote shootdowns take place. 565 * This does not apply to the kernel pmap; all CPUs are notified about 566 * invalidations for the kernel pmap, and the pmap lock is not held 567 * in pmap_activate() for the kernel pmap. 568 * 569 * ==> P->V operations (e.g. pmap_page_protect()) may require sending 570 * invalidations for multiple address spaces. We only track one 571 * address space at a time, and if we encounter more than one, then 572 * the notification each CPU gets is to hit the entire TLB. Note 573 * also that we can't serialize with pmap_activate() in this case, 574 * so all CPUs will get the notification, and they check when 575 * processing the notification if the pmap is current on that CPU. 576 * 577 * Invalidation information is gathered into a pmap_tlb_context structure 578 * that includes room for 8 VAs, the pmap the VAs belong to, a bitmap of 579 * CPUs to be notified, and a list for PT pages that are freed during 580 * removal off mappings. The number of valid addresses in the list as 581 * well as flags are squeezed into the lower bits of the first two VAs. 582 * Storage for this structure is allocated on the stack. We need to be 583 * careful to keep the size of this structure under control. 584 * 585 * When notifying remote CPUs, we acquire the tlb_lock (which also 586 * blocks IPIs), record the pointer to our context structure, set a 587 * global bitmap off CPUs to be notified, and then send the IPIs to 588 * each victim. While the other CPUs are in-flight, we then perform 589 * any invalidations necessary on the local CPU. Once that is done, 590 * we then wait the global context pointer to be cleared, which 591 * will be done by the final remote CPU to complete their work. This 592 * method reduces cache line contention during processing. 593 * 594 * When removing mappings in user pmaps, this implementation frees page 595 * table pages back to the VM system once they contain no valid mappings. 596 * As we do this, we must ensure to invalidate TLB entries that the 597 * CPU might hold for the respective recursive VPT mappings. This must 598 * be done whenever an L1 or L2 PTE is invalidated. Until these VPT 599 * translations are invalidated, the PT pages must not be reused. For 600 * this reason, we keep a list of freed PT pages in the context structure 601 * and drain them off once all invalidations are complete. 602 * 603 * NOTE: The value of TLB_CTX_MAXVA is tuned to accommodate the UBC 604 * window size (defined as 64KB on alpha in <machine/vmparam.h>). 605 */ 606 607 #define TLB_CTX_F_ASM __BIT(0) 608 #define TLB_CTX_F_IMB __BIT(1) 609 #define TLB_CTX_F_KIMB __BIT(2) 610 #define TLB_CTX_F_PV __BIT(3) 611 #define TLB_CTX_F_MULTI __BIT(4) 612 613 #define TLB_CTX_COUNT(ctx) ((ctx)->t_addrdata[0] & PAGE_MASK) 614 #define TLB_CTX_INC_COUNT(ctx) (ctx)->t_addrdata[0]++ 615 #define TLB_CTX_SET_ALLVA(ctx) (ctx)->t_addrdata[0] |= TLB_CTX_ALLVA 616 617 #define TLB_CTX_FLAGS(ctx) ((ctx)->t_addrdata[1] & PAGE_MASK) 618 #define TLB_CTX_SET_FLAG(ctx, f) (ctx)->t_addrdata[1] |= (f) 619 620 #define TLB_CTX_VA(ctx, i) ((ctx)->t_addrdata[(i)] & ~PAGE_MASK) 621 #define TLB_CTX_SETVA(ctx, i, va) \ 622 (ctx)->t_addrdata[(i)] = (va) | ((ctx)->t_addrdata[(i)] & PAGE_MASK) 623 624 static struct { 625 kmutex_t lock; 626 struct evcnt events; 627 } tlb_shootdown __cacheline_aligned; 628 #define tlb_lock tlb_shootdown.lock 629 #define tlb_evcnt tlb_shootdown.events 630 #if defined(MULTIPROCESSOR) 631 static const struct pmap_tlb_context *tlb_context __cacheline_aligned; 632 static unsigned long tlb_pending __cacheline_aligned; 633 #endif /* MULTIPROCESSOR */ 634 635 #if defined(TLB_STATS) 636 #define TLB_COUNT_DECL(cnt) static struct evcnt tlb_stat_##cnt 637 #define TLB_COUNT(cnt) atomic_inc_64(&tlb_stat_##cnt .ev_count) 638 #define TLB_COUNT_ATTACH(cnt) \ 639 evcnt_attach_dynamic_nozero(&tlb_stat_##cnt, EVCNT_TYPE_MISC, \ 640 NULL, "TLB", #cnt) 641 642 TLB_COUNT_DECL(invalidate_multi_tbia); 643 TLB_COUNT_DECL(invalidate_multi_tbiap); 644 TLB_COUNT_DECL(invalidate_multi_imb); 645 646 TLB_COUNT_DECL(invalidate_kern_tbia); 647 TLB_COUNT_DECL(invalidate_kern_tbis); 648 TLB_COUNT_DECL(invalidate_kern_imb); 649 650 TLB_COUNT_DECL(invalidate_user_not_current); 651 TLB_COUNT_DECL(invalidate_user_lazy_imb); 652 TLB_COUNT_DECL(invalidate_user_tbiap); 653 TLB_COUNT_DECL(invalidate_user_tbis); 654 655 TLB_COUNT_DECL(shootdown_kernel); 656 TLB_COUNT_DECL(shootdown_user); 657 TLB_COUNT_DECL(shootdown_imb); 658 TLB_COUNT_DECL(shootdown_kimb); 659 TLB_COUNT_DECL(shootdown_overflow); 660 661 TLB_COUNT_DECL(shootdown_all_user); 662 TLB_COUNT_DECL(shootdown_all_user_imb); 663 664 TLB_COUNT_DECL(shootdown_pv); 665 TLB_COUNT_DECL(shootdown_pv_multi); 666 667 TLB_COUNT_DECL(shootnow_over_notify); 668 TLB_COUNT_DECL(shootnow_remote); 669 670 TLB_COUNT_DECL(reason_remove_kernel); 671 TLB_COUNT_DECL(reason_remove_user); 672 TLB_COUNT_DECL(reason_remove_all_user); 673 TLB_COUNT_DECL(reason_page_protect_read); 674 TLB_COUNT_DECL(reason_page_protect_none); 675 TLB_COUNT_DECL(reason_protect); 676 TLB_COUNT_DECL(reason_enter_kernel); 677 TLB_COUNT_DECL(reason_enter_user); 678 TLB_COUNT_DECL(reason_kenter); 679 TLB_COUNT_DECL(reason_enter_l2pt_delref); 680 TLB_COUNT_DECL(reason_enter_l3pt_delref); 681 TLB_COUNT_DECL(reason_kremove); 682 TLB_COUNT_DECL(reason_clear_modify); 683 TLB_COUNT_DECL(reason_clear_reference); 684 TLB_COUNT_DECL(reason_emulate_reference); 685 686 TLB_COUNT_DECL(asn_reuse); 687 TLB_COUNT_DECL(asn_newgen); 688 TLB_COUNT_DECL(asn_assign); 689 690 TLB_COUNT_DECL(activate_both_change); 691 TLB_COUNT_DECL(activate_asn_change); 692 TLB_COUNT_DECL(activate_ptbr_change); 693 TLB_COUNT_DECL(activate_swpctx); 694 TLB_COUNT_DECL(activate_skip_swpctx); 695 696 #else /* ! TLB_STATS */ 697 #define TLB_COUNT(cnt) __nothing 698 #define TLB_COUNT_ATTACH(cnt) __nothing 699 #endif /* TLB_STATS */ 700 701 static void 702 pmap_tlb_init(void) 703 { 704 /* mutex is initialized in pmap_bootstrap(). */ 705 706 evcnt_attach_dynamic_nozero(&tlb_evcnt, EVCNT_TYPE_MISC, 707 NULL, "TLB", "shootdown"); 708 709 TLB_COUNT_ATTACH(invalidate_multi_tbia); 710 TLB_COUNT_ATTACH(invalidate_multi_tbiap); 711 TLB_COUNT_ATTACH(invalidate_multi_imb); 712 713 TLB_COUNT_ATTACH(invalidate_kern_tbia); 714 TLB_COUNT_ATTACH(invalidate_kern_tbis); 715 TLB_COUNT_ATTACH(invalidate_kern_imb); 716 717 TLB_COUNT_ATTACH(invalidate_user_not_current); 718 TLB_COUNT_ATTACH(invalidate_user_lazy_imb); 719 TLB_COUNT_ATTACH(invalidate_user_tbiap); 720 TLB_COUNT_ATTACH(invalidate_user_tbis); 721 722 TLB_COUNT_ATTACH(shootdown_kernel); 723 TLB_COUNT_ATTACH(shootdown_user); 724 TLB_COUNT_ATTACH(shootdown_imb); 725 TLB_COUNT_ATTACH(shootdown_kimb); 726 TLB_COUNT_ATTACH(shootdown_overflow); 727 728 TLB_COUNT_ATTACH(shootdown_all_user); 729 TLB_COUNT_ATTACH(shootdown_all_user_imb); 730 731 TLB_COUNT_ATTACH(shootdown_pv); 732 TLB_COUNT_ATTACH(shootdown_pv_multi); 733 734 TLB_COUNT_ATTACH(shootnow_over_notify); 735 TLB_COUNT_ATTACH(shootnow_remote); 736 737 TLB_COUNT_ATTACH(reason_remove_kernel); 738 TLB_COUNT_ATTACH(reason_remove_user); 739 TLB_COUNT_ATTACH(reason_remove_all_user); 740 TLB_COUNT_ATTACH(reason_page_protect_read); 741 TLB_COUNT_ATTACH(reason_page_protect_none); 742 TLB_COUNT_ATTACH(reason_protect); 743 TLB_COUNT_ATTACH(reason_enter_kernel); 744 TLB_COUNT_ATTACH(reason_enter_user); 745 TLB_COUNT_ATTACH(reason_kenter); 746 TLB_COUNT_ATTACH(reason_enter_l2pt_delref); 747 TLB_COUNT_ATTACH(reason_enter_l3pt_delref); 748 TLB_COUNT_ATTACH(reason_kremove); 749 TLB_COUNT_ATTACH(reason_clear_modify); 750 TLB_COUNT_ATTACH(reason_clear_reference); 751 752 TLB_COUNT_ATTACH(asn_reuse); 753 TLB_COUNT_ATTACH(asn_newgen); 754 TLB_COUNT_ATTACH(asn_assign); 755 756 TLB_COUNT_ATTACH(activate_both_change); 757 TLB_COUNT_ATTACH(activate_asn_change); 758 TLB_COUNT_ATTACH(activate_ptbr_change); 759 TLB_COUNT_ATTACH(activate_swpctx); 760 TLB_COUNT_ATTACH(activate_skip_swpctx); 761 } 762 763 static inline void 764 pmap_tlb_context_init(struct pmap_tlb_context * const tlbctx, uintptr_t flags) 765 { 766 /* Initialize the minimum number of fields. */ 767 tlbctx->t_addrdata[0] = 0; 768 tlbctx->t_addrdata[1] = flags; 769 tlbctx->t_pmap = NULL; 770 LIST_INIT(&tlbctx->t_freeptq); 771 LIST_INIT(&tlbctx->t_freepvq); 772 } 773 774 static void 775 pmap_tlb_shootdown_internal(pmap_t const pmap, vaddr_t const va, 776 pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx) 777 { 778 KASSERT(pmap != NULL); 779 KASSERT((va & PAGE_MASK) == 0); 780 781 /* 782 * Figure out who needs to hear about this, and the scope 783 * of an all-entries invalidate. 784 */ 785 if (pmap == pmap_kernel()) { 786 TLB_COUNT(shootdown_kernel); 787 KASSERT(pte_bits & PG_ASM); 788 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_ASM); 789 790 /* Note if an I-stream sync is also needed. */ 791 if (pte_bits & PG_EXEC) { 792 TLB_COUNT(shootdown_kimb); 793 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_KIMB); 794 } 795 } else { 796 TLB_COUNT(shootdown_user); 797 KASSERT((pte_bits & PG_ASM) == 0); 798 799 /* Note if an I-stream sync is also needed. */ 800 if (pte_bits & PG_EXEC) { 801 TLB_COUNT(shootdown_imb); 802 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_IMB); 803 } 804 } 805 806 KASSERT(tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap); 807 tlbctx->t_pmap = pmap; 808 809 /* 810 * If we're already at the max, just tell each active CPU 811 * to nail everything. 812 */ 813 const uintptr_t count = TLB_CTX_COUNT(tlbctx); 814 if (count > TLB_CTX_MAXVA) { 815 return; 816 } 817 if (count == TLB_CTX_MAXVA) { 818 TLB_COUNT(shootdown_overflow); 819 TLB_CTX_SET_ALLVA(tlbctx); 820 return; 821 } 822 823 TLB_CTX_SETVA(tlbctx, count, va); 824 TLB_CTX_INC_COUNT(tlbctx); 825 } 826 827 static void 828 pmap_tlb_shootdown(pmap_t const pmap, vaddr_t const va, 829 pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx) 830 { 831 KASSERT((TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV) == 0); 832 pmap_tlb_shootdown_internal(pmap, va, pte_bits, tlbctx); 833 } 834 835 static void 836 pmap_tlb_shootdown_all_user(pmap_t const pmap, pt_entry_t const pte_bits, 837 struct pmap_tlb_context * const tlbctx) 838 { 839 KASSERT(pmap != pmap_kernel()); 840 841 TLB_COUNT(shootdown_all_user); 842 843 /* Note if an I-stream sync is also needed. */ 844 if (pte_bits & PG_EXEC) { 845 TLB_COUNT(shootdown_all_user_imb); 846 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_IMB); 847 } 848 849 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV) { 850 if (tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap) { 851 if (tlbctx->t_pmap == NULL) { 852 pmap_reference(pmap); 853 tlbctx->t_pmap = pmap; 854 } 855 } else { 856 TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_MULTI); 857 } 858 } else { 859 KASSERT(tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap); 860 tlbctx->t_pmap = pmap; 861 } 862 863 TLB_CTX_SET_ALLVA(tlbctx); 864 } 865 866 static void 867 pmap_tlb_shootdown_pv(pmap_t const pmap, vaddr_t const va, 868 pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx) 869 { 870 871 KASSERT(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV); 872 873 TLB_COUNT(shootdown_pv); 874 875 if (tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap) { 876 if (tlbctx->t_pmap == NULL) { 877 pmap_reference(pmap); 878 tlbctx->t_pmap = pmap; 879 } 880 pmap_tlb_shootdown_internal(pmap, va, pte_bits, tlbctx); 881 } else { 882 TLB_COUNT(shootdown_pv_multi); 883 uintptr_t flags = TLB_CTX_F_MULTI; 884 if (pmap == pmap_kernel()) { 885 KASSERT(pte_bits & PG_ASM); 886 flags |= TLB_CTX_F_ASM; 887 } else { 888 KASSERT((pte_bits & PG_ASM) == 0); 889 } 890 891 /* 892 * No need to distinguish between kernel and user IMB 893 * here; see pmap_tlb_invalidate_multi(). 894 */ 895 if (pte_bits & PG_EXEC) { 896 flags |= TLB_CTX_F_IMB; 897 } 898 TLB_CTX_SET_ALLVA(tlbctx); 899 TLB_CTX_SET_FLAG(tlbctx, flags); 900 } 901 } 902 903 static void 904 pmap_tlb_invalidate_multi(const struct pmap_tlb_context * const tlbctx) 905 { 906 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) { 907 TLB_COUNT(invalidate_multi_tbia); 908 ALPHA_TBIA(); 909 } else { 910 TLB_COUNT(invalidate_multi_tbiap); 911 ALPHA_TBIAP(); 912 } 913 if (TLB_CTX_FLAGS(tlbctx) & (TLB_CTX_F_IMB | TLB_CTX_F_KIMB)) { 914 TLB_COUNT(invalidate_multi_imb); 915 alpha_pal_imb(); 916 } 917 } 918 919 static void 920 pmap_tlb_invalidate_kernel(const struct pmap_tlb_context * const tlbctx) 921 { 922 const uintptr_t count = TLB_CTX_COUNT(tlbctx); 923 924 if (count == TLB_CTX_ALLVA) { 925 TLB_COUNT(invalidate_kern_tbia); 926 ALPHA_TBIA(); 927 } else { 928 TLB_COUNT(invalidate_kern_tbis); 929 for (uintptr_t i = 0; i < count; i++) { 930 ALPHA_TBIS(TLB_CTX_VA(tlbctx, i)); 931 } 932 } 933 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_KIMB) { 934 TLB_COUNT(invalidate_kern_imb); 935 alpha_pal_imb(); 936 } 937 } 938 939 static void 940 pmap_tlb_invalidate(const struct pmap_tlb_context * const tlbctx, 941 const struct cpu_info * const ci) 942 { 943 const uintptr_t count = TLB_CTX_COUNT(tlbctx); 944 945 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_MULTI) { 946 pmap_tlb_invalidate_multi(tlbctx); 947 return; 948 } 949 950 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) { 951 pmap_tlb_invalidate_kernel(tlbctx); 952 return; 953 } 954 955 KASSERT(kpreempt_disabled()); 956 957 pmap_t const pmap = tlbctx->t_pmap; 958 KASSERT(pmap != NULL); 959 960 if (__predict_false(pmap != ci->ci_pmap)) { 961 TLB_COUNT(invalidate_user_not_current); 962 963 /* 964 * For CPUs that don't implement ASNs, the SWPCTX call 965 * does all of the TLB invalidation work for us. 966 */ 967 if (__predict_false(pmap_max_asn == 0)) { 968 return; 969 } 970 971 const u_long cpu_mask = 1UL << ci->ci_cpuid; 972 973 /* 974 * We cannot directly invalidate the TLB in this case, 975 * so force allocation of a new ASN when the pmap becomes 976 * active again. 977 */ 978 pmap->pm_percpu[ci->ci_cpuid].pmc_asngen = PMAP_ASNGEN_INVALID; 979 atomic_and_ulong(&pmap->pm_cpus, ~cpu_mask); 980 981 /* 982 * This isn't strictly necessary; when we allocate a 983 * new ASN, we're going to clear this bit and skip 984 * syncing the I-stream. But we will keep this bit 985 * of accounting for internal consistency. 986 */ 987 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_IMB) { 988 pmap->pm_percpu[ci->ci_cpuid].pmc_needisync = 1; 989 } 990 return; 991 } 992 993 if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_IMB) { 994 TLB_COUNT(invalidate_user_lazy_imb); 995 pmap->pm_percpu[ci->ci_cpuid].pmc_needisync = 1; 996 } 997 998 if (count == TLB_CTX_ALLVA) { 999 /* 1000 * Another option here for CPUs that implement ASNs is 1001 * to allocate a new ASN and do a SWPCTX. That's almost 1002 * certainly faster than a TBIAP, but would require us 1003 * to synchronize against IPIs in pmap_activate(). 1004 */ 1005 TLB_COUNT(invalidate_user_tbiap); 1006 KASSERT((TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) == 0); 1007 ALPHA_TBIAP(); 1008 } else { 1009 TLB_COUNT(invalidate_user_tbis); 1010 for (uintptr_t i = 0; i < count; i++) { 1011 ALPHA_TBIS(TLB_CTX_VA(tlbctx, i)); 1012 } 1013 } 1014 } 1015 1016 static void 1017 pmap_tlb_shootnow(const struct pmap_tlb_context * const tlbctx) 1018 { 1019 1020 if (TLB_CTX_COUNT(tlbctx) == 0) { 1021 /* No work to do. */ 1022 return; 1023 } 1024 1025 /* 1026 * Acquire the shootdown mutex. This will also block IPL_VM 1027 * interrupts and disable preemption. It is critically important 1028 * that IPIs not be blocked in this routine. 1029 */ 1030 KASSERT(alpha_pal_rdps() < ALPHA_PSL_IPL_CLOCK); 1031 mutex_spin_enter(&tlb_lock); 1032 tlb_evcnt.ev_count++; 1033 1034 const struct cpu_info *ci = curcpu(); 1035 const u_long this_cpu = 1UL << ci->ci_cpuid; 1036 u_long active_cpus; 1037 bool activation_locked, activation_lock_tried; 1038 1039 /* 1040 * Figure out who to notify. If it's for the kernel or 1041 * multiple address spaces, we notify everybody. If 1042 * it's a single user pmap, then we try to acquire the 1043 * activation lock so we can get an accurate accounting 1044 * of who needs to be notified. If we can't acquire 1045 * the activation lock, then just notify everyone and 1046 * let them sort it out when they process the IPI. 1047 */ 1048 if (TLB_CTX_FLAGS(tlbctx) & (TLB_CTX_F_ASM | TLB_CTX_F_MULTI)) { 1049 active_cpus = pmap_all_cpus(); 1050 activation_locked = false; 1051 activation_lock_tried = false; 1052 } else { 1053 KASSERT(tlbctx->t_pmap != NULL); 1054 activation_locked = PMAP_ACT_TRYLOCK(tlbctx->t_pmap); 1055 if (__predict_true(activation_locked)) { 1056 active_cpus = tlbctx->t_pmap->pm_cpus; 1057 } else { 1058 TLB_COUNT(shootnow_over_notify); 1059 active_cpus = pmap_all_cpus(); 1060 } 1061 activation_lock_tried = true; 1062 } 1063 1064 #if defined(MULTIPROCESSOR) 1065 /* 1066 * If there are remote CPUs that need to do work, get them 1067 * started now. 1068 */ 1069 const u_long remote_cpus = active_cpus & ~this_cpu; 1070 KASSERT(tlb_context == NULL); 1071 if (remote_cpus) { 1072 TLB_COUNT(shootnow_remote); 1073 tlb_context = tlbctx; 1074 tlb_pending = remote_cpus; 1075 alpha_multicast_ipi(remote_cpus, ALPHA_IPI_SHOOTDOWN); 1076 } 1077 #endif /* MULTIPROCESSOR */ 1078 1079 /* 1080 * Now that the remotes have been notified, release the 1081 * activation lock. 1082 */ 1083 if (activation_lock_tried) { 1084 if (activation_locked) { 1085 KASSERT(tlbctx->t_pmap != NULL); 1086 PMAP_ACT_UNLOCK(tlbctx->t_pmap); 1087 } 1088 /* 1089 * When we tried to acquire the activation lock, we 1090 * raised IPL to IPL_SCHED (even if we ultimately 1091 * failed to acquire the lock), which blocks out IPIs. 1092 * Force our IPL back down to IPL_VM so that we can 1093 * receive IPIs. 1094 */ 1095 alpha_pal_swpipl(IPL_VM); 1096 } 1097 1098 /* 1099 * Do any work that we might need to do. We don't need to 1100 * synchronize with activation here because we know that 1101 * for the current CPU, activation status will not change. 1102 */ 1103 if (active_cpus & this_cpu) { 1104 pmap_tlb_invalidate(tlbctx, ci); 1105 } 1106 1107 #if defined(MULTIPROCESSOR) 1108 /* Wait for remote CPUs to finish. */ 1109 if (remote_cpus) { 1110 int backoff = SPINLOCK_BACKOFF_MIN; 1111 u_int spins = 0; 1112 1113 while (atomic_load_acquire(&tlb_context) != NULL) { 1114 SPINLOCK_BACKOFF(backoff); 1115 if (spins++ > 0x0fffffff) { 1116 printf("TLB LOCAL MASK = 0x%016lx\n", 1117 this_cpu); 1118 printf("TLB REMOTE MASK = 0x%016lx\n", 1119 remote_cpus); 1120 printf("TLB REMOTE PENDING = 0x%016lx\n", 1121 tlb_pending); 1122 printf("TLB CONTEXT = %p\n", tlb_context); 1123 printf("TLB LOCAL IPL = %lu\n", 1124 alpha_pal_rdps()); 1125 panic("pmap_tlb_shootnow"); 1126 } 1127 } 1128 } 1129 KASSERT(tlb_context == NULL); 1130 #endif /* MULTIPROCESSOR */ 1131 1132 mutex_spin_exit(&tlb_lock); 1133 1134 if (__predict_false(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV)) { 1135 /* 1136 * P->V TLB operations may operate on multiple pmaps. 1137 * The shootdown takes a reference on the first pmap it 1138 * encounters, in order to prevent it from disappearing, 1139 * in the hope that we end up with a single-pmap P->V 1140 * operation (instrumentation shows this is not rare). 1141 * 1142 * Once this shootdown is finished globally, we need to 1143 * release this extra reference. 1144 */ 1145 KASSERT(tlbctx->t_pmap != NULL); 1146 pmap_destroy(tlbctx->t_pmap); 1147 } 1148 } 1149 1150 #if defined(MULTIPROCESSOR) 1151 void 1152 pmap_tlb_shootdown_ipi(struct cpu_info * const ci, 1153 1154 struct trapframe * const tf __unused) 1155 { 1156 KASSERT(tlb_context != NULL); 1157 pmap_tlb_invalidate(tlb_context, ci); 1158 if (atomic_and_ulong_nv(&tlb_pending, ~(1UL << ci->ci_cpuid)) == 0) { 1159 atomic_store_release(&tlb_context, NULL); 1160 } 1161 } 1162 #endif /* MULTIPROCESSOR */ 1163 1164 static inline void 1165 pmap_tlb_context_drain(struct pmap_tlb_context * const tlbctx) 1166 { 1167 if (! LIST_EMPTY(&tlbctx->t_freeptq)) { 1168 pmap_pagelist_free(&tlbctx->t_freeptq); 1169 } 1170 if (! LIST_EMPTY(&tlbctx->t_freepvq)) { 1171 pmap_pvlist_free(&tlbctx->t_freepvq); 1172 } 1173 } 1174 1175 /* 1176 * ASN management functions. 1177 */ 1178 static u_int pmap_asn_alloc(pmap_t, struct cpu_info *); 1179 1180 /* 1181 * Misc. functions. 1182 */ 1183 static struct vm_page *pmap_physpage_alloc(int); 1184 static void pmap_physpage_free(paddr_t); 1185 static int pmap_physpage_addref(void *); 1186 static int pmap_physpage_delref(void *); 1187 1188 static bool vtophys_internal(vaddr_t, paddr_t *p); 1189 1190 /* 1191 * PMAP_KERNEL_PTE: 1192 * 1193 * Get a kernel PTE. 1194 * 1195 * If debugging, do a table walk. If not debugging, just use 1196 * the Virtual Page Table, since all kernel page tables are 1197 * pre-allocated and mapped in. 1198 */ 1199 #ifdef DEBUG 1200 #define PMAP_KERNEL_PTE(va) \ 1201 ({ \ 1202 pt_entry_t *l1pte_, *l2pte_; \ 1203 \ 1204 l1pte_ = pmap_l1pte(kernel_lev1map, va); \ 1205 if (pmap_pte_v(l1pte_) == 0) { \ 1206 printf("kernel level 1 PTE not valid, va 0x%lx " \ 1207 "(line %d) pte=%p *pte=0x%016lx\n", (va), __LINE__, \ 1208 l1pte_, *l1pte_); \ 1209 panic("PMAP_KERNEL_PTE"); \ 1210 } \ 1211 l2pte_ = pmap_l2pte(kernel_lev1map, va, l1pte_); \ 1212 if (pmap_pte_v(l2pte_) == 0) { \ 1213 printf("kernel level 2 PTE not valid, va 0x%lx " \ 1214 "(line %d) pte=%p *pte=0x%016lx\n", (va), __LINE__, \ 1215 l2pte_, *l2pte_); \ 1216 panic("PMAP_KERNEL_PTE"); \ 1217 } \ 1218 pmap_l3pte(kernel_lev1map, va, l2pte_); \ 1219 }) 1220 #else 1221 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))]) 1222 #endif 1223 1224 /* 1225 * PMAP_STAT_{INCR,DECR}: 1226 * 1227 * Increment or decrement a pmap statistic. 1228 */ 1229 #define PMAP_STAT_INCR(s, v) atomic_add_long((unsigned long *)(&(s)), (v)) 1230 #define PMAP_STAT_DECR(s, v) atomic_add_long((unsigned long *)(&(s)), -(v)) 1231 1232 /* 1233 * pmap_init_cpu: 1234 * 1235 * Initilize pmap data in the cpu_info. 1236 */ 1237 void 1238 pmap_init_cpu(struct cpu_info * const ci) 1239 { 1240 pmap_t const pmap = pmap_kernel(); 1241 1242 /* All CPUs start out using the kernel pmap. */ 1243 atomic_or_ulong(&pmap->pm_cpus, 1UL << ci->ci_cpuid); 1244 pmap_reference(pmap); 1245 ci->ci_pmap = pmap; 1246 1247 /* Initialize ASN allocation logic. */ 1248 ci->ci_next_asn = PMAP_ASN_FIRST_USER; 1249 ci->ci_asn_gen = PMAP_ASNGEN_INITIAL; 1250 } 1251 1252 /* 1253 * pmap_bootstrap: 1254 * 1255 * Bootstrap the system to run with virtual memory. 1256 * 1257 * Note: no locking is necessary in this function. 1258 */ 1259 void 1260 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids) 1261 { 1262 vsize_t lev2mapsize, lev3mapsize; 1263 pt_entry_t *lev2map, *lev3map; 1264 pt_entry_t pte; 1265 vsize_t bufsz; 1266 struct pcb *pcb; 1267 int i; 1268 1269 #ifdef DEBUG 1270 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP)) 1271 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn); 1272 #endif 1273 1274 /* 1275 * Compute the number of pages kmem_arena will have. 1276 */ 1277 kmeminit_nkmempages(); 1278 1279 /* 1280 * Figure out how many initial PTE's are necessary to map the 1281 * kernel. We also reserve space for kmem_alloc_pageable() 1282 * for vm_fork(). 1283 */ 1284 1285 /* Get size of buffer cache and set an upper limit */ 1286 bufsz = buf_memcalc(); 1287 buf_setvalimit(bufsz); 1288 1289 lev3mapsize = 1290 (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) + 1291 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE + 1292 (maxproc * UPAGES) + nkmempages; 1293 1294 lev3mapsize = roundup(lev3mapsize, NPTEPG); 1295 1296 /* 1297 * Initialize `FYI' variables. Note we're relying on 1298 * the fact that BSEARCH sorts the vm_physmem[] array 1299 * for us. 1300 */ 1301 avail_start = ptoa(uvm_physseg_get_avail_start(uvm_physseg_get_first())); 1302 avail_end = ptoa(uvm_physseg_get_avail_end(uvm_physseg_get_last())); 1303 virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE; 1304 1305 #if 0 1306 printf("avail_start = 0x%lx\n", avail_start); 1307 printf("avail_end = 0x%lx\n", avail_end); 1308 printf("virtual_end = 0x%lx\n", virtual_end); 1309 #endif 1310 1311 /* 1312 * Allocate a level 1 PTE table for the kernel. 1313 * This is always one page long. 1314 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 1315 */ 1316 kernel_lev1map = (pt_entry_t *) 1317 uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG); 1318 1319 /* 1320 * Allocate a level 2 PTE table for the kernel. 1321 * These must map all of the level3 PTEs. 1322 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL. 1323 */ 1324 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG); 1325 lev2map = (pt_entry_t *) 1326 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize); 1327 1328 /* 1329 * Allocate a level 3 PTE table for the kernel. 1330 * Contains lev3mapsize PTEs. 1331 */ 1332 lev3map = (pt_entry_t *) 1333 uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize); 1334 1335 /* 1336 * Set up level 1 page table 1337 */ 1338 1339 /* Map all of the level 2 pte pages */ 1340 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) { 1341 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) + 1342 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 1343 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 1344 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS + 1345 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte; 1346 } 1347 1348 /* Map the virtual page table */ 1349 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT) 1350 << PG_SHIFT; 1351 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */ 1352 kernel_lev1map[l1pte_index(VPTBASE)] = pte; 1353 VPT = (pt_entry_t *)VPTBASE; 1354 1355 /* 1356 * Set up level 2 page table. 1357 */ 1358 /* Map all of the level 3 pte pages */ 1359 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) { 1360 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) + 1361 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT; 1362 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 1363 /* 1364 * No need to use l2pte_index() here; it's equivalent 1365 * to just indexing with our loop variable i, but will 1366 * fall over if we end up with more than 1 L2 PT page. 1367 * 1368 * In other words: 1369 * 1370 * l2pte_index(VM_MIN_KERNEL_ADDRESS + 1371 * (i*PAGE_SIZE*NPTEPG)) 1372 * 1373 * ...is the same as 'i' so long as i stays below 1024. 1374 */ 1375 lev2map[i] = pte; 1376 } 1377 1378 /* Initialize the pmap_growkernel_lock. */ 1379 rw_init(&pmap_growkernel_lock); 1380 1381 /* 1382 * Set up level three page table (lev3map) 1383 */ 1384 /* Nothing to do; it's already zero'd */ 1385 1386 /* 1387 * Initialize the pmap pools and list. 1388 */ 1389 pmap_ncpuids = ncpuids; 1390 pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 1391 COHERENCY_UNIT, 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL); 1392 pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt", 1393 &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL); 1394 pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0, 1395 PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL, 1396 NULL, NULL); 1397 1398 TAILQ_INIT(&pmap_all_pmaps); 1399 1400 /* Initialize the ASN logic. See also pmap_init_cpu(). */ 1401 pmap_max_asn = maxasn; 1402 1403 /* 1404 * Initialize the locks. 1405 */ 1406 rw_init(&pmap_main_lock); 1407 mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE); 1408 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 1409 mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE); 1410 } 1411 for (i = 0; i < __arraycount(pmap_pvh_locks); i++) { 1412 mutex_init(&pmap_pmap_locks[i].locks.lock, 1413 MUTEX_DEFAULT, IPL_NONE); 1414 mutex_init(&pmap_pmap_locks[i].locks.activation_lock, 1415 MUTEX_SPIN, IPL_SCHED); 1416 } 1417 1418 /* 1419 * This must block any interrupt from which a TLB shootdown 1420 * could be issued, but must NOT block IPIs. 1421 */ 1422 mutex_init(&tlb_lock, MUTEX_SPIN, IPL_VM); 1423 1424 /* 1425 * Initialize kernel pmap. Note that all kernel mappings 1426 * have PG_ASM set, so the ASN doesn't really matter for 1427 * the kernel pmap. Also, since the kernel pmap always 1428 * references kernel_lev1map, it always has an invalid ASN 1429 * generation. 1430 */ 1431 memset(pmap_kernel(), 0, sizeof(struct pmap)); 1432 LIST_INIT(&pmap_kernel()->pm_ptpages); 1433 LIST_INIT(&pmap_kernel()->pm_pvents); 1434 atomic_store_relaxed(&pmap_kernel()->pm_count, 1); 1435 /* Kernel pmap does not have per-CPU info. */ 1436 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list); 1437 1438 /* 1439 * Set up lwp0's PCB such that the ptbr points to the right place 1440 * and has the kernel pmap's (really unused) ASN. 1441 */ 1442 pcb = lwp_getpcb(&lwp0); 1443 pcb->pcb_hw.apcb_ptbr = 1444 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT; 1445 pcb->pcb_hw.apcb_asn = PMAP_ASN_KERNEL; 1446 1447 struct cpu_info * const ci = curcpu(); 1448 pmap_init_cpu(ci); 1449 } 1450 1451 /* 1452 * pmap_virtual_space: [ INTERFACE ] 1453 * 1454 * Define the initial bounds of the kernel virtual address space. 1455 */ 1456 void 1457 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 1458 { 1459 1460 *vstartp = VM_MIN_KERNEL_ADDRESS; /* kernel is in K0SEG */ 1461 *vendp = VM_MAX_KERNEL_ADDRESS; /* we use pmap_growkernel */ 1462 } 1463 1464 /* 1465 * pmap_steal_memory: [ INTERFACE ] 1466 * 1467 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()). 1468 * This function allows for early dynamic memory allocation until the 1469 * virtual memory system has been bootstrapped. After that point, either 1470 * kmem_alloc or malloc should be used. This function works by stealing 1471 * pages from the (to be) managed page pool, then implicitly mapping the 1472 * pages (by using their k0seg addresses) and zeroing them. 1473 * 1474 * It may be used once the physical memory segments have been pre-loaded 1475 * into the vm_physmem[] array. Early memory allocation MUST use this 1476 * interface! This cannot be used after vm_page_startup(), and will 1477 * generate a panic if tried. 1478 * 1479 * Note that this memory will never be freed, and in essence it is wired 1480 * down. 1481 * 1482 * We must adjust *vstartp and/or *vendp iff we use address space 1483 * from the kernel virtual address range defined by pmap_virtual_space(). 1484 * 1485 * Note: no locking is necessary in this function. 1486 */ 1487 vaddr_t 1488 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 1489 { 1490 int npgs; 1491 vaddr_t va; 1492 paddr_t pa; 1493 1494 uvm_physseg_t bank; 1495 1496 size = round_page(size); 1497 npgs = atop(size); 1498 1499 #if 0 1500 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs); 1501 #endif 1502 1503 for (bank = uvm_physseg_get_first(); 1504 uvm_physseg_valid_p(bank); 1505 bank = uvm_physseg_get_next(bank)) { 1506 if (uvm.page_init_done == true) 1507 panic("pmap_steal_memory: called _after_ bootstrap"); 1508 1509 #if 0 1510 printf(" bank %d: avail_start 0x%"PRIxPADDR", start 0x%"PRIxPADDR", " 1511 "avail_end 0x%"PRIxPADDR"\n", bank, uvm_physseg_get_avail_start(bank), 1512 uvm_physseg_get_start(bank), uvm_physseg_get_avail_end(bank)); 1513 #endif 1514 1515 if (uvm_physseg_get_avail_start(bank) != uvm_physseg_get_start(bank) || 1516 uvm_physseg_get_avail_start(bank) >= uvm_physseg_get_avail_end(bank)) 1517 continue; 1518 1519 #if 0 1520 printf(" avail_end - avail_start = 0x%"PRIxPADDR"\n", 1521 uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank)); 1522 #endif 1523 1524 if (uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank) 1525 < npgs) 1526 continue; 1527 1528 /* 1529 * There are enough pages here; steal them! 1530 */ 1531 pa = ptoa(uvm_physseg_get_start(bank)); 1532 uvm_physseg_unplug(atop(pa), npgs); 1533 1534 va = ALPHA_PHYS_TO_K0SEG(pa); 1535 memset((void *)va, 0, size); 1536 pmap_pages_stolen += npgs; 1537 return (va); 1538 } 1539 1540 /* 1541 * If we got here, this was no memory left. 1542 */ 1543 panic("pmap_steal_memory: no memory to steal"); 1544 } 1545 1546 /* 1547 * pmap_init: [ INTERFACE ] 1548 * 1549 * Initialize the pmap module. Called by vm_init(), to initialize any 1550 * structures that the pmap system needs to map virtual memory. 1551 * 1552 * Note: no locking is necessary in this function. 1553 */ 1554 void 1555 pmap_init(void) 1556 { 1557 1558 #ifdef DEBUG 1559 if (pmapdebug & PDB_FOLLOW) 1560 printf("pmap_init()\n"); 1561 #endif 1562 1563 /* initialize protection array */ 1564 alpha_protection_init(); 1565 1566 /* Initialize TLB handling. */ 1567 pmap_tlb_init(); 1568 1569 /* Instrument pmap_growkernel(). */ 1570 evcnt_attach_dynamic_nozero(&pmap_growkernel_evcnt, EVCNT_TYPE_MISC, 1571 NULL, "pmap", "growkernel"); 1572 1573 /* 1574 * Set a low water mark on the pv_entry pool, so that we are 1575 * more likely to have these around even in extreme memory 1576 * starvation. 1577 */ 1578 pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat); 1579 1580 /* 1581 * Now it is safe to enable pv entry recording. 1582 */ 1583 pmap_initialized = true; 1584 1585 #if 0 1586 for (uvm_physseg_t bank = uvm_physseg_get_first(); 1587 uvm_physseg_valid_p(bank); 1588 bank = uvm_physseg_get_next(bank)) { 1589 printf("bank %d\n", bank); 1590 printf("\tstart = 0x%lx\n", ptoa(uvm_physseg_get_start(bank))); 1591 printf("\tend = 0x%lx\n", ptoa(uvm_physseg_get_end(bank))); 1592 printf("\tavail_start = 0x%lx\n", 1593 ptoa(uvm_physseg_get_avail_start(bank))); 1594 printf("\tavail_end = 0x%lx\n", 1595 ptoa(uvm_physseg_get_avail_end(bank))); 1596 } 1597 #endif 1598 } 1599 1600 /* 1601 * pmap_create: [ INTERFACE ] 1602 * 1603 * Create and return a physical map. 1604 * 1605 * Note: no locking is necessary in this function. 1606 */ 1607 pmap_t 1608 pmap_create(void) 1609 { 1610 pmap_t pmap; 1611 pt_entry_t *lev1map; 1612 int i; 1613 1614 #ifdef DEBUG 1615 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 1616 printf("pmap_create()\n"); 1617 #endif 1618 1619 pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK); 1620 memset(pmap, 0, sizeof(*pmap)); 1621 LIST_INIT(&pmap->pm_ptpages); 1622 LIST_INIT(&pmap->pm_pvents); 1623 1624 atomic_store_relaxed(&pmap->pm_count, 1); 1625 1626 try_again: 1627 rw_enter(&pmap_growkernel_lock, RW_READER); 1628 1629 lev1map = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT); 1630 if (__predict_false(lev1map == NULL)) { 1631 rw_exit(&pmap_growkernel_lock); 1632 (void) kpause("pmap_create", false, hz >> 2, NULL); 1633 goto try_again; 1634 } 1635 1636 /* 1637 * There are only kernel mappings at this point; give the pmap 1638 * the kernel ASN. This will be initialized to correct values 1639 * when the pmap is activated. 1640 * 1641 * We stash a pointer to the pmap's lev1map in each CPU's 1642 * private data. It remains constant for the life of the 1643 * pmap, and gives us more room in the shared pmap structure. 1644 */ 1645 for (i = 0; i < pmap_ncpuids; i++) { 1646 pmap->pm_percpu[i].pmc_asn = PMAP_ASN_KERNEL; 1647 pmap->pm_percpu[i].pmc_asngen = PMAP_ASNGEN_INVALID; 1648 pmap->pm_percpu[i].pmc_lev1map = lev1map; 1649 } 1650 1651 mutex_enter(&pmap_all_pmaps_lock); 1652 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list); 1653 mutex_exit(&pmap_all_pmaps_lock); 1654 1655 rw_exit(&pmap_growkernel_lock); 1656 1657 return (pmap); 1658 } 1659 1660 /* 1661 * pmap_destroy: [ INTERFACE ] 1662 * 1663 * Drop the reference count on the specified pmap, releasing 1664 * all resources if the reference count drops to zero. 1665 */ 1666 void 1667 pmap_destroy(pmap_t pmap) 1668 { 1669 1670 #ifdef DEBUG 1671 if (pmapdebug & PDB_FOLLOW) 1672 printf("pmap_destroy(%p)\n", pmap); 1673 #endif 1674 1675 PMAP_MP(membar_release()); 1676 KASSERT(atomic_load_relaxed(&pmap->pm_count) > 0); 1677 if (atomic_dec_uint_nv(&pmap->pm_count) > 0) 1678 return; 1679 PMAP_MP(membar_acquire()); 1680 1681 pt_entry_t *lev1map = pmap_lev1map(pmap); 1682 1683 rw_enter(&pmap_growkernel_lock, RW_READER); 1684 1685 /* 1686 * Remove it from the global list of all pmaps. 1687 */ 1688 mutex_enter(&pmap_all_pmaps_lock); 1689 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list); 1690 mutex_exit(&pmap_all_pmaps_lock); 1691 1692 pool_cache_put(&pmap_l1pt_cache, lev1map); 1693 #ifdef DIAGNOSTIC 1694 int i; 1695 for (i = 0; i < pmap_ncpuids; i++) { 1696 pmap->pm_percpu[i].pmc_lev1map = (pt_entry_t *)0xdeadbeefUL; 1697 } 1698 #endif /* DIAGNOSTIC */ 1699 1700 rw_exit(&pmap_growkernel_lock); 1701 1702 pool_cache_put(&pmap_pmap_cache, pmap); 1703 } 1704 1705 /* 1706 * pmap_reference: [ INTERFACE ] 1707 * 1708 * Add a reference to the specified pmap. 1709 */ 1710 void 1711 pmap_reference(pmap_t pmap) 1712 { 1713 unsigned int newcount __diagused; 1714 1715 #ifdef DEBUG 1716 if (pmapdebug & PDB_FOLLOW) 1717 printf("pmap_reference(%p)\n", pmap); 1718 #endif 1719 1720 newcount = atomic_inc_uint_nv(&pmap->pm_count); 1721 KASSERT(newcount != 0); 1722 } 1723 1724 /* 1725 * pmap_remove: [ INTERFACE ] 1726 * 1727 * Remove the given range of addresses from the specified map. 1728 * 1729 * It is assumed that the start and end are properly 1730 * rounded to the page size. 1731 */ 1732 static void 1733 pmap_remove_internal(pmap_t pmap, vaddr_t sva, vaddr_t eva, 1734 struct pmap_tlb_context * const tlbctx) 1735 { 1736 pt_entry_t *l1pte, *l2pte, *l3pte; 1737 pt_entry_t *saved_l2pte, *saved_l3pte; 1738 vaddr_t l1eva, l2eva, l3vptva; 1739 pt_entry_t pte_bits; 1740 1741 #ifdef DEBUG 1742 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 1743 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva); 1744 #endif 1745 1746 /* 1747 * If this is the kernel pmap, we can use a faster method 1748 * for accessing the PTEs (since the PT pages are always 1749 * resident). 1750 * 1751 * Note that this routine should NEVER be called from an 1752 * interrupt context; pmap_kremove() is used for that. 1753 */ 1754 if (pmap == pmap_kernel()) { 1755 PMAP_MAP_TO_HEAD_LOCK(); 1756 PMAP_LOCK(pmap); 1757 1758 while (sva < eva) { 1759 l3pte = PMAP_KERNEL_PTE(sva); 1760 if (pmap_pte_v(l3pte)) { 1761 pte_bits = pmap_remove_mapping(pmap, sva, 1762 l3pte, true, NULL, tlbctx); 1763 pmap_tlb_shootdown(pmap, sva, pte_bits, 1764 tlbctx); 1765 } 1766 sva += PAGE_SIZE; 1767 } 1768 1769 PMAP_MAP_TO_HEAD_UNLOCK(); 1770 PMAP_UNLOCK(pmap); 1771 pmap_tlb_shootnow(tlbctx); 1772 /* kernel PT pages are never freed. */ 1773 KASSERT(LIST_EMPTY(&tlbctx->t_freeptq)); 1774 /* ...but we might have freed PV entries. */ 1775 pmap_tlb_context_drain(tlbctx); 1776 TLB_COUNT(reason_remove_kernel); 1777 1778 return; 1779 } 1780 1781 pt_entry_t * const lev1map = pmap_lev1map(pmap); 1782 1783 KASSERT(sva < VM_MAXUSER_ADDRESS); 1784 KASSERT(eva <= VM_MAXUSER_ADDRESS); 1785 KASSERT(lev1map != kernel_lev1map); 1786 1787 PMAP_MAP_TO_HEAD_LOCK(); 1788 PMAP_LOCK(pmap); 1789 1790 l1pte = pmap_l1pte(lev1map, sva); 1791 1792 for (; sva < eva; sva = l1eva, l1pte++) { 1793 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 1794 if (pmap_pte_v(l1pte)) { 1795 saved_l2pte = l2pte = pmap_l2pte(lev1map, sva, l1pte); 1796 1797 /* 1798 * Add a reference to the L2 table so it won't 1799 * get removed from under us. 1800 */ 1801 pmap_physpage_addref(saved_l2pte); 1802 1803 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 1804 l2eva = 1805 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 1806 if (pmap_pte_v(l2pte)) { 1807 saved_l3pte = l3pte = 1808 pmap_l3pte(lev1map, sva, l2pte); 1809 1810 /* 1811 * Add a reference to the L3 table so 1812 * it won't get removed from under us. 1813 */ 1814 pmap_physpage_addref(saved_l3pte); 1815 1816 /* 1817 * Remember this sva; if the L3 table 1818 * gets removed, we need to invalidate 1819 * the VPT TLB entry for it. 1820 */ 1821 l3vptva = sva; 1822 1823 for (; sva < l2eva && sva < eva; 1824 sva += PAGE_SIZE, l3pte++) { 1825 if (!pmap_pte_v(l3pte)) { 1826 continue; 1827 } 1828 pte_bits = 1829 pmap_remove_mapping( 1830 pmap, sva, 1831 l3pte, true, 1832 NULL, tlbctx); 1833 pmap_tlb_shootdown(pmap, 1834 sva, pte_bits, tlbctx); 1835 } 1836 1837 /* 1838 * Remove the reference to the L3 1839 * table that we added above. This 1840 * may free the L3 table. 1841 */ 1842 pmap_l3pt_delref(pmap, l3vptva, 1843 saved_l3pte, tlbctx); 1844 } 1845 } 1846 1847 /* 1848 * Remove the reference to the L2 table that we 1849 * added above. This may free the L2 table. 1850 */ 1851 pmap_l2pt_delref(pmap, l1pte, saved_l2pte, tlbctx); 1852 } 1853 } 1854 1855 PMAP_MAP_TO_HEAD_UNLOCK(); 1856 PMAP_UNLOCK(pmap); 1857 pmap_tlb_shootnow(tlbctx); 1858 pmap_tlb_context_drain(tlbctx); 1859 TLB_COUNT(reason_remove_user); 1860 } 1861 1862 void 1863 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva) 1864 { 1865 struct pmap_tlb_context tlbctx; 1866 1867 pmap_tlb_context_init(&tlbctx, 0); 1868 pmap_remove_internal(pmap, sva, eva, &tlbctx); 1869 } 1870 1871 /* 1872 * pmap_remove_all: [ INTERFACE ] 1873 * 1874 * Remove all mappings from a pmap in bulk. This is only called 1875 * when it's known that the address space is no longer visible to 1876 * any user process (e.g. during exit or exec). 1877 */ 1878 bool 1879 pmap_remove_all(pmap_t pmap) 1880 { 1881 struct pmap_tlb_context tlbctx; 1882 struct vm_page *pg; 1883 pv_entry_t pv; 1884 1885 KASSERT(pmap != pmap_kernel()); 1886 1887 /* 1888 * This process is pretty simple: 1889 * 1890 * ==> (1) Zero out the user-space portion of the lev1map. 1891 * 1892 * ==> (2) Copy the PT page list to the tlbctx and re-init. 1893 * 1894 * ==> (3) Walk the PV entry list and remove each entry. 1895 * 1896 * ==> (4) Zero the wired and resident count. 1897 * 1898 * Once we've done that, we just need to free everything 1899 * back to the system. 1900 */ 1901 1902 pmap_tlb_context_init(&tlbctx, 0); 1903 1904 PMAP_MAP_TO_HEAD_LOCK(); 1905 PMAP_LOCK(pmap); 1906 1907 /* Step 1 */ 1908 pt_entry_t * const lev1map = pmap_lev1map(pmap); 1909 memset(lev1map, 0, 1910 l1pte_index(VM_MAXUSER_ADDRESS) * sizeof(pt_entry_t)); 1911 1912 /* Step 2 */ 1913 LIST_MOVE(&pmap->pm_ptpages, &tlbctx.t_freeptq, pageq.list); 1914 1915 /* Fix up the reference count on the lev1map page. */ 1916 pg = PHYS_TO_VM_PAGE(ALPHA_K0SEG_TO_PHYS((vaddr_t)lev1map)); 1917 PHYSPAGE_REFCNT_SET(pg, 0); 1918 1919 /* Step 3 */ 1920 while ((pv = LIST_FIRST(&pmap->pm_pvents)) != NULL) { 1921 KASSERT(pv->pv_pmap == pmap); 1922 pmap_pv_remove(pmap, PHYS_TO_VM_PAGE(pmap_pte_pa(pv->pv_pte)), 1923 pv->pv_va, true, NULL, &tlbctx); 1924 } 1925 1926 /* Step 4 */ 1927 atomic_store_relaxed(&pmap->pm_stats.wired_count, 0); 1928 atomic_store_relaxed(&pmap->pm_stats.resident_count, 0); 1929 1930 pmap_tlb_shootdown_all_user(pmap, PG_EXEC, &tlbctx); 1931 1932 PMAP_UNLOCK(pmap); 1933 PMAP_MAP_TO_HEAD_UNLOCK(); 1934 1935 pmap_tlb_shootnow(&tlbctx); 1936 pmap_tlb_context_drain(&tlbctx); 1937 TLB_COUNT(reason_remove_all_user); 1938 1939 return true; 1940 } 1941 1942 /* 1943 * pmap_page_protect: [ INTERFACE ] 1944 * 1945 * Lower the permission for all mappings to a given page to 1946 * the permissions specified. 1947 */ 1948 void 1949 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 1950 { 1951 pv_entry_t pv, nextpv; 1952 pt_entry_t opte; 1953 kmutex_t *lock; 1954 struct pmap_tlb_context tlbctx; 1955 1956 #ifdef DEBUG 1957 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 1958 (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE))) 1959 printf("pmap_page_protect(%p, %x)\n", pg, prot); 1960 #endif 1961 1962 pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV); 1963 1964 switch (prot) { 1965 case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE: 1966 case VM_PROT_READ|VM_PROT_WRITE: 1967 return; 1968 1969 /* copy_on_write */ 1970 case VM_PROT_READ|VM_PROT_EXECUTE: 1971 case VM_PROT_READ: 1972 PMAP_HEAD_TO_MAP_LOCK(); 1973 lock = pmap_pvh_lock(pg); 1974 mutex_enter(lock); 1975 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 1976 PMAP_LOCK(pv->pv_pmap); 1977 opte = atomic_load_relaxed(pv->pv_pte); 1978 if (opte & (PG_KWE | PG_UWE)) { 1979 atomic_store_relaxed(pv->pv_pte, 1980 opte & ~(PG_KWE | PG_UWE)); 1981 pmap_tlb_shootdown_pv(pv->pv_pmap, pv->pv_va, 1982 opte, &tlbctx); 1983 } 1984 PMAP_UNLOCK(pv->pv_pmap); 1985 } 1986 mutex_exit(lock); 1987 PMAP_HEAD_TO_MAP_UNLOCK(); 1988 pmap_tlb_shootnow(&tlbctx); 1989 TLB_COUNT(reason_page_protect_read); 1990 return; 1991 1992 /* remove_all */ 1993 default: 1994 break; 1995 } 1996 1997 PMAP_HEAD_TO_MAP_LOCK(); 1998 lock = pmap_pvh_lock(pg); 1999 mutex_enter(lock); 2000 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = nextpv) { 2001 pt_entry_t pte_bits; 2002 pmap_t pmap; 2003 vaddr_t va; 2004 2005 nextpv = pv->pv_next; 2006 2007 PMAP_LOCK(pv->pv_pmap); 2008 pmap = pv->pv_pmap; 2009 va = pv->pv_va; 2010 pte_bits = pmap_remove_mapping(pmap, va, pv->pv_pte, 2011 false, NULL, &tlbctx); 2012 pmap_tlb_shootdown_pv(pmap, va, pte_bits, &tlbctx); 2013 PMAP_UNLOCK(pv->pv_pmap); 2014 } 2015 mutex_exit(lock); 2016 PMAP_HEAD_TO_MAP_UNLOCK(); 2017 pmap_tlb_shootnow(&tlbctx); 2018 pmap_tlb_context_drain(&tlbctx); 2019 TLB_COUNT(reason_page_protect_none); 2020 } 2021 2022 /* 2023 * pmap_protect: [ INTERFACE ] 2024 * 2025 * Set the physical protection on the specified range of this map 2026 * as requested. 2027 */ 2028 void 2029 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 2030 { 2031 pt_entry_t *l1pte, *l2pte, *l3pte, opte; 2032 vaddr_t l1eva, l2eva; 2033 struct pmap_tlb_context tlbctx; 2034 2035 #ifdef DEBUG 2036 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 2037 printf("pmap_protect(%p, %lx, %lx, %x)\n", 2038 pmap, sva, eva, prot); 2039 #endif 2040 2041 pmap_tlb_context_init(&tlbctx, 0); 2042 2043 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2044 pmap_remove_internal(pmap, sva, eva, &tlbctx); 2045 return; 2046 } 2047 2048 const pt_entry_t bits = pte_prot(pmap, prot); 2049 pt_entry_t * const lev1map = pmap_lev1map(pmap); 2050 2051 PMAP_LOCK(pmap); 2052 2053 l1pte = pmap_l1pte(lev1map, sva); 2054 for (; sva < eva; sva = l1eva, l1pte++) { 2055 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE; 2056 if (pmap_pte_v(l1pte)) { 2057 l2pte = pmap_l2pte(lev1map, sva, l1pte); 2058 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) { 2059 l2eva = 2060 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE; 2061 if (pmap_pte_v(l2pte)) { 2062 l3pte = pmap_l3pte(lev1map, sva, l2pte); 2063 for (; sva < l2eva && sva < eva; 2064 sva += PAGE_SIZE, l3pte++) { 2065 if (pmap_pte_v(l3pte) && 2066 pmap_pte_prot_chg(l3pte, 2067 bits)) { 2068 opte = atomic_load_relaxed(l3pte); 2069 pmap_pte_set_prot(l3pte, 2070 bits); 2071 pmap_tlb_shootdown(pmap, 2072 sva, opte, &tlbctx); 2073 } 2074 } 2075 } 2076 } 2077 } 2078 } 2079 2080 PMAP_UNLOCK(pmap); 2081 pmap_tlb_shootnow(&tlbctx); 2082 TLB_COUNT(reason_protect); 2083 } 2084 2085 /* 2086 * pmap_enter_tlb_shootdown: 2087 * 2088 * Carry out a TLB shootdown on behalf of a pmap_enter() 2089 * or a pmap_kenter_pa(). This is factored out separately 2090 * because we expect it to be not a common case. 2091 */ 2092 static void __noinline 2093 pmap_enter_tlb_shootdown(pmap_t const pmap, vaddr_t const va, 2094 pt_entry_t const pte_bits, bool locked) 2095 { 2096 struct pmap_tlb_context tlbctx; 2097 2098 pmap_tlb_context_init(&tlbctx, 0); 2099 pmap_tlb_shootdown(pmap, va, pte_bits, &tlbctx); 2100 if (locked) { 2101 PMAP_UNLOCK(pmap); 2102 } 2103 pmap_tlb_shootnow(&tlbctx); 2104 } 2105 2106 /* 2107 * pmap_enter_l2pt_delref: 2108 * 2109 * Release a reference on an L2 PT page for pmap_enter(). 2110 * This is factored out separately because we expect it 2111 * to be a rare case. 2112 */ 2113 static void __noinline 2114 pmap_enter_l2pt_delref(pmap_t const pmap, pt_entry_t * const l1pte, 2115 pt_entry_t * const l2pte) 2116 { 2117 struct pmap_tlb_context tlbctx; 2118 2119 /* 2120 * PALcode may have tried to service a TLB miss with 2121 * this L2 PTE, so we need to make sure we don't actually 2122 * free the PT page until we've shot down any TLB entries 2123 * for this VPT index. 2124 */ 2125 2126 pmap_tlb_context_init(&tlbctx, 0); 2127 pmap_l2pt_delref(pmap, l1pte, l2pte, &tlbctx); 2128 PMAP_UNLOCK(pmap); 2129 pmap_tlb_shootnow(&tlbctx); 2130 pmap_tlb_context_drain(&tlbctx); 2131 TLB_COUNT(reason_enter_l2pt_delref); 2132 } 2133 2134 /* 2135 * pmap_enter_l3pt_delref: 2136 * 2137 * Release a reference on an L3 PT page for pmap_enter(). 2138 * This is factored out separately because we expect it 2139 * to be a rare case. 2140 */ 2141 static void __noinline 2142 pmap_enter_l3pt_delref(pmap_t const pmap, vaddr_t const va, 2143 pt_entry_t * const pte) 2144 { 2145 struct pmap_tlb_context tlbctx; 2146 2147 /* 2148 * PALcode may have tried to service a TLB miss with 2149 * this PTE, so we need to make sure we don't actually 2150 * free the PT page until we've shot down any TLB entries 2151 * for this VPT index. 2152 */ 2153 2154 pmap_tlb_context_init(&tlbctx, 0); 2155 pmap_l3pt_delref(pmap, va, pte, &tlbctx); 2156 PMAP_UNLOCK(pmap); 2157 pmap_tlb_shootnow(&tlbctx); 2158 pmap_tlb_context_drain(&tlbctx); 2159 TLB_COUNT(reason_enter_l3pt_delref); 2160 } 2161 2162 /* 2163 * pmap_enter: [ INTERFACE ] 2164 * 2165 * Insert the given physical page (p) at 2166 * the specified virtual address (v) in the 2167 * target physical map with the protection requested. 2168 * 2169 * If specified, the page will be wired down, meaning 2170 * that the related pte can not be reclaimed. 2171 * 2172 * Note: This is the only routine which MAY NOT lazy-evaluate 2173 * or lose information. That is, this routine must actually 2174 * insert this page into the given map NOW. 2175 */ 2176 int 2177 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2178 { 2179 pt_entry_t *pte, npte, opte; 2180 pv_entry_t opv = NULL; 2181 paddr_t opa; 2182 bool tflush = false; 2183 int error = 0; 2184 kmutex_t *lock; 2185 2186 #ifdef DEBUG 2187 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 2188 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n", 2189 pmap, va, pa, prot, flags); 2190 #endif 2191 struct vm_page * const pg = PHYS_TO_VM_PAGE(pa); 2192 const bool wired = (flags & PMAP_WIRED) != 0; 2193 2194 PMAP_MAP_TO_HEAD_LOCK(); 2195 PMAP_LOCK(pmap); 2196 2197 if (pmap == pmap_kernel()) { 2198 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2199 pte = PMAP_KERNEL_PTE(va); 2200 } else { 2201 pt_entry_t *l1pte, *l2pte; 2202 pt_entry_t * const lev1map = pmap_lev1map(pmap); 2203 2204 KASSERT(va < VM_MAXUSER_ADDRESS); 2205 KASSERT(lev1map != kernel_lev1map); 2206 2207 /* 2208 * Check to see if the level 1 PTE is valid, and 2209 * allocate a new level 2 page table page if it's not. 2210 * A reference will be added to the level 2 table when 2211 * the level 3 table is created. 2212 */ 2213 l1pte = pmap_l1pte(lev1map, va); 2214 if (pmap_pte_v(l1pte) == 0) { 2215 pmap_physpage_addref(l1pte); 2216 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT); 2217 if (error) { 2218 pmap_l1pt_delref(pmap, l1pte); 2219 if (flags & PMAP_CANFAIL) 2220 goto out; 2221 panic("pmap_enter: unable to create L2 PT " 2222 "page"); 2223 } 2224 #ifdef DEBUG 2225 if (pmapdebug & PDB_PTPAGE) 2226 printf("pmap_enter: new level 2 table at " 2227 "0x%lx\n", pmap_pte_pa(l1pte)); 2228 #endif 2229 } 2230 2231 /* 2232 * Check to see if the level 2 PTE is valid, and 2233 * allocate a new level 3 page table page if it's not. 2234 * A reference will be added to the level 3 table when 2235 * the mapping is validated. 2236 */ 2237 l2pte = pmap_l2pte(lev1map, va, l1pte); 2238 if (pmap_pte_v(l2pte) == 0) { 2239 pmap_physpage_addref(l2pte); 2240 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT); 2241 if (error) { 2242 /* unlocks pmap */ 2243 pmap_enter_l2pt_delref(pmap, l1pte, l2pte); 2244 if (flags & PMAP_CANFAIL) { 2245 PMAP_LOCK(pmap); 2246 goto out; 2247 } 2248 panic("pmap_enter: unable to create L3 PT " 2249 "page"); 2250 } 2251 #ifdef DEBUG 2252 if (pmapdebug & PDB_PTPAGE) 2253 printf("pmap_enter: new level 3 table at " 2254 "0x%lx\n", pmap_pte_pa(l2pte)); 2255 #endif 2256 } 2257 2258 /* 2259 * Get the PTE that will map the page. 2260 */ 2261 pte = pmap_l3pte(lev1map, va, l2pte); 2262 } 2263 2264 /* Remember all of the old PTE; used for TBI check later. */ 2265 opte = atomic_load_relaxed(pte); 2266 2267 /* 2268 * Check to see if the old mapping is valid. If not, validate the 2269 * new one immediately. 2270 */ 2271 if ((opte & PG_V) == 0) { 2272 /* No TLB invalidations needed for new mappings. */ 2273 2274 if (pmap != pmap_kernel()) { 2275 /* 2276 * New mappings gain a reference on the level 3 2277 * table. 2278 */ 2279 pmap_physpage_addref(pte); 2280 } 2281 goto validate_enterpv; 2282 } 2283 2284 opa = pmap_pte_pa(pte); 2285 2286 if (opa == pa) { 2287 /* 2288 * Mapping has not changed; must be a protection or 2289 * wiring change. 2290 */ 2291 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) { 2292 #ifdef DEBUG 2293 if (pmapdebug & PDB_ENTER) 2294 printf("pmap_enter: wiring change -> %d\n", 2295 wired); 2296 #endif 2297 /* Adjust the wiring count. */ 2298 if (wired) 2299 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2300 else 2301 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2302 } 2303 2304 /* Set the PTE. */ 2305 goto validate; 2306 } 2307 2308 /* 2309 * The mapping has changed. We need to invalidate the 2310 * old mapping before creating the new one. 2311 */ 2312 #ifdef DEBUG 2313 if (pmapdebug & PDB_ENTER) 2314 printf("pmap_enter: removing old mapping 0x%lx\n", va); 2315 #endif 2316 if (pmap != pmap_kernel()) { 2317 /* 2318 * Gain an extra reference on the level 3 table. 2319 * pmap_remove_mapping() will delete a reference, 2320 * and we don't want the table to be erroneously 2321 * freed. 2322 */ 2323 pmap_physpage_addref(pte); 2324 } 2325 /* Already have the bits from opte above. */ 2326 (void) pmap_remove_mapping(pmap, va, pte, true, &opv, NULL); 2327 2328 validate_enterpv: 2329 /* Enter the mapping into the pv_table if appropriate. */ 2330 if (pg != NULL) { 2331 error = pmap_pv_enter(pmap, pg, va, pte, true, opv); 2332 if (error) { 2333 /* This can only fail if opv == NULL */ 2334 KASSERT(opv == NULL); 2335 2336 /* unlocks pmap */ 2337 pmap_enter_l3pt_delref(pmap, va, pte); 2338 if (flags & PMAP_CANFAIL) { 2339 PMAP_LOCK(pmap); 2340 goto out; 2341 } 2342 panic("pmap_enter: unable to enter mapping in PV " 2343 "table"); 2344 } 2345 opv = NULL; 2346 } 2347 2348 /* Increment counters. */ 2349 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 2350 if (wired) 2351 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2352 2353 validate: 2354 /* Build the new PTE. */ 2355 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V; 2356 if (pg != NULL) { 2357 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2358 uintptr_t attrs = 0; 2359 2360 KASSERT(((flags & VM_PROT_ALL) & ~prot) == 0); 2361 2362 if (flags & VM_PROT_WRITE) 2363 attrs |= (PGA_REFERENCED|PGA_MODIFIED); 2364 else if (flags & VM_PROT_ALL) 2365 attrs |= PGA_REFERENCED; 2366 2367 lock = pmap_pvh_lock(pg); 2368 mutex_enter(lock); 2369 attrs = (md->pvh_listx |= attrs); 2370 mutex_exit(lock); 2371 2372 /* Set up referenced/modified emulation for new mapping. */ 2373 if ((attrs & PGA_REFERENCED) == 0) 2374 npte |= PG_FOR | PG_FOW | PG_FOE; 2375 else if ((attrs & PGA_MODIFIED) == 0) 2376 npte |= PG_FOW; 2377 2378 /* 2379 * Mapping was entered on PV list. 2380 */ 2381 npte |= PG_PVLIST; 2382 } 2383 if (wired) 2384 npte |= PG_WIRED; 2385 #ifdef DEBUG 2386 if (pmapdebug & PDB_ENTER) 2387 printf("pmap_enter: new pte = 0x%lx\n", npte); 2388 #endif 2389 2390 /* 2391 * If the HW / PALcode portion of the new PTE is the same as the 2392 * old PTE, no TBI is necessary. 2393 */ 2394 if (opte & PG_V) { 2395 tflush = PG_PALCODE(opte) != PG_PALCODE(npte); 2396 } 2397 2398 /* Set the new PTE. */ 2399 atomic_store_relaxed(pte, npte); 2400 2401 out: 2402 PMAP_MAP_TO_HEAD_UNLOCK(); 2403 2404 /* 2405 * Invalidate the TLB entry for this VA and any appropriate 2406 * caches. 2407 */ 2408 if (tflush) { 2409 /* unlocks pmap */ 2410 pmap_enter_tlb_shootdown(pmap, va, opte, true); 2411 if (pmap == pmap_kernel()) { 2412 TLB_COUNT(reason_enter_kernel); 2413 } else { 2414 TLB_COUNT(reason_enter_user); 2415 } 2416 } else { 2417 PMAP_UNLOCK(pmap); 2418 } 2419 2420 if (opv) 2421 pmap_pv_free(opv); 2422 2423 return error; 2424 } 2425 2426 /* 2427 * pmap_kenter_pa: [ INTERFACE ] 2428 * 2429 * Enter a va -> pa mapping into the kernel pmap without any 2430 * physical->virtual tracking. 2431 * 2432 * Note: no locking is necessary in this function. 2433 */ 2434 void 2435 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2436 { 2437 pmap_t const pmap = pmap_kernel(); 2438 2439 #ifdef DEBUG 2440 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 2441 printf("pmap_kenter_pa(%lx, %lx, %x)\n", 2442 va, pa, prot); 2443 #endif 2444 2445 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2446 2447 pt_entry_t * const pte = PMAP_KERNEL_PTE(va); 2448 2449 /* Build the new PTE. */ 2450 const pt_entry_t npte = 2451 ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) | 2452 PG_V | PG_WIRED; 2453 2454 /* Set the new PTE. */ 2455 const pt_entry_t opte = atomic_load_relaxed(pte); 2456 atomic_store_relaxed(pte, npte); 2457 2458 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1); 2459 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1); 2460 2461 /* 2462 * There should not have been anything here, previously, 2463 * so we can skip TLB shootdowns, etc. in the common case. 2464 */ 2465 if (__predict_false(opte & PG_V)) { 2466 const pt_entry_t diff = npte ^ opte; 2467 2468 printf_nolog("%s: mapping already present\n", __func__); 2469 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 2470 if (diff & PG_WIRED) 2471 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2472 /* XXX Can't handle this case. */ 2473 if (diff & PG_PVLIST) 2474 panic("pmap_kenter_pa: old mapping was managed"); 2475 2476 pmap_enter_tlb_shootdown(pmap_kernel(), va, opte, false); 2477 TLB_COUNT(reason_kenter); 2478 } 2479 } 2480 2481 /* 2482 * pmap_kremove: [ INTERFACE ] 2483 * 2484 * Remove a mapping entered with pmap_kenter_pa() starting at va, 2485 * for size bytes (assumed to be page rounded). 2486 */ 2487 void 2488 pmap_kremove(vaddr_t va, vsize_t size) 2489 { 2490 pt_entry_t *pte, opte; 2491 pmap_t const pmap = pmap_kernel(); 2492 struct pmap_tlb_context tlbctx; 2493 int count = 0; 2494 2495 #ifdef DEBUG 2496 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 2497 printf("pmap_kremove(%lx, %lx)\n", 2498 va, size); 2499 #endif 2500 2501 pmap_tlb_context_init(&tlbctx, 0); 2502 2503 KASSERT(va >= VM_MIN_KERNEL_ADDRESS); 2504 2505 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) { 2506 pte = PMAP_KERNEL_PTE(va); 2507 opte = atomic_load_relaxed(pte); 2508 if (opte & PG_V) { 2509 KASSERT((opte & PG_PVLIST) == 0); 2510 2511 /* Zap the mapping. */ 2512 atomic_store_relaxed(pte, PG_NV); 2513 pmap_tlb_shootdown(pmap, va, opte, &tlbctx); 2514 2515 count++; 2516 } 2517 } 2518 2519 /* Update stats. */ 2520 if (__predict_true(count != 0)) { 2521 PMAP_STAT_DECR(pmap->pm_stats.resident_count, count); 2522 PMAP_STAT_DECR(pmap->pm_stats.wired_count, count); 2523 } 2524 2525 pmap_tlb_shootnow(&tlbctx); 2526 TLB_COUNT(reason_kremove); 2527 } 2528 2529 /* 2530 * pmap_unwire: [ INTERFACE ] 2531 * 2532 * Clear the wired attribute for a map/virtual-address pair. 2533 * 2534 * The mapping must already exist in the pmap. 2535 */ 2536 void 2537 pmap_unwire(pmap_t pmap, vaddr_t va) 2538 { 2539 pt_entry_t *pte; 2540 2541 #ifdef DEBUG 2542 if (pmapdebug & PDB_FOLLOW) 2543 printf("pmap_unwire(%p, %lx)\n", pmap, va); 2544 #endif 2545 2546 PMAP_LOCK(pmap); 2547 2548 pte = pmap_l3pte(pmap_lev1map(pmap), va, NULL); 2549 2550 KASSERT(pte != NULL); 2551 KASSERT(pmap_pte_v(pte)); 2552 2553 /* 2554 * If wiring actually changed (always?) clear the wire bit and 2555 * update the wire count. Note that wiring is not a hardware 2556 * characteristic so there is no need to invalidate the TLB. 2557 */ 2558 if (pmap_pte_w_chg(pte, 0)) { 2559 pmap_pte_set_w(pte, false); 2560 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 2561 } 2562 #ifdef DEBUG 2563 else { 2564 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 2565 "didn't change!\n", pmap, va); 2566 } 2567 #endif 2568 2569 PMAP_UNLOCK(pmap); 2570 } 2571 2572 /* 2573 * pmap_extract: [ INTERFACE ] 2574 * 2575 * Extract the physical address associated with the given 2576 * pmap/virtual address pair. 2577 */ 2578 bool 2579 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap) 2580 { 2581 pt_entry_t *l1pte, *l2pte, *l3pte; 2582 paddr_t pa; 2583 2584 #ifdef DEBUG 2585 if (pmapdebug & PDB_FOLLOW) 2586 printf("pmap_extract(%p, %lx) -> ", pmap, va); 2587 #endif 2588 2589 /* 2590 * Take a faster path for the kernel pmap. Avoids locking, 2591 * handles K0SEG. 2592 */ 2593 if (__predict_true(pmap == pmap_kernel())) { 2594 #ifdef DEBUG 2595 bool address_is_valid = vtophys_internal(va, pap); 2596 if (pmapdebug & PDB_FOLLOW) { 2597 if (address_is_valid) { 2598 printf("0x%lx (kernel vtophys)\n", *pap); 2599 } else { 2600 printf("failed (kernel vtophys)\n"); 2601 } 2602 } 2603 return address_is_valid; 2604 #else 2605 return vtophys_internal(va, pap); 2606 #endif 2607 } 2608 2609 pt_entry_t * const lev1map = pmap_lev1map(pmap); 2610 2611 PMAP_LOCK(pmap); 2612 2613 l1pte = pmap_l1pte(lev1map, va); 2614 if (pmap_pte_v(l1pte) == 0) 2615 goto out; 2616 2617 l2pte = pmap_l2pte(lev1map, va, l1pte); 2618 if (pmap_pte_v(l2pte) == 0) 2619 goto out; 2620 2621 l3pte = pmap_l3pte(lev1map, va, l2pte); 2622 if (pmap_pte_v(l3pte) == 0) 2623 goto out; 2624 2625 pa = pmap_pte_pa(l3pte) | (va & PGOFSET); 2626 PMAP_UNLOCK(pmap); 2627 if (pap != NULL) 2628 *pap = pa; 2629 #ifdef DEBUG 2630 if (pmapdebug & PDB_FOLLOW) 2631 printf("0x%lx\n", pa); 2632 #endif 2633 return (true); 2634 2635 out: 2636 PMAP_UNLOCK(pmap); 2637 #ifdef DEBUG 2638 if (pmapdebug & PDB_FOLLOW) 2639 printf("failed\n"); 2640 #endif 2641 return (false); 2642 } 2643 2644 /* 2645 * pmap_copy: [ INTERFACE ] 2646 * 2647 * Copy the mapping range specified by src_addr/len 2648 * from the source map to the range dst_addr/len 2649 * in the destination map. 2650 * 2651 * This routine is only advisory and need not do anything. 2652 */ 2653 /* call deleted in <machine/pmap.h> */ 2654 2655 /* 2656 * pmap_update: [ INTERFACE ] 2657 * 2658 * Require that all active physical maps contain no 2659 * incorrect entries NOW, by processing any deferred 2660 * pmap operations. 2661 */ 2662 /* call deleted in <machine/pmap.h> */ 2663 2664 /* 2665 * pmap_activate: [ INTERFACE ] 2666 * 2667 * Activate the pmap used by the specified process. This includes 2668 * reloading the MMU context of the current process, and marking 2669 * the pmap in use by the processor. 2670 */ 2671 void 2672 pmap_activate(struct lwp *l) 2673 { 2674 struct pmap * const pmap = l->l_proc->p_vmspace->vm_map.pmap; 2675 struct pcb * const pcb = lwp_getpcb(l); 2676 2677 #ifdef DEBUG 2678 if (pmapdebug & PDB_FOLLOW) 2679 printf("pmap_activate(%p)\n", l); 2680 #endif 2681 2682 KASSERT(kpreempt_disabled()); 2683 2684 struct cpu_info * const ci = curcpu(); 2685 2686 KASSERT(l == ci->ci_curlwp); 2687 2688 u_long const old_ptbr = pcb->pcb_hw.apcb_ptbr; 2689 u_int const old_asn = pcb->pcb_hw.apcb_asn; 2690 2691 /* 2692 * We hold the activation lock to synchronize with TLB shootdown. 2693 * The kernel pmap does not require those tests because shootdowns 2694 * for the kernel pmap are always sent to all CPUs. 2695 */ 2696 if (pmap != pmap_kernel()) { 2697 PMAP_ACT_LOCK(pmap); 2698 pcb->pcb_hw.apcb_asn = pmap_asn_alloc(pmap, ci); 2699 atomic_or_ulong(&pmap->pm_cpus, (1UL << ci->ci_cpuid)); 2700 } else { 2701 pcb->pcb_hw.apcb_asn = PMAP_ASN_KERNEL; 2702 } 2703 pcb->pcb_hw.apcb_ptbr = 2704 ALPHA_K0SEG_TO_PHYS((vaddr_t)pmap_lev1map(pmap)) >> PGSHIFT; 2705 2706 /* 2707 * Check to see if the ASN or page table base has changed; if 2708 * so, switch to our own context again so that it will take 2709 * effect. 2710 * 2711 * We test ASN first because it's the most likely value to change. 2712 */ 2713 if (old_asn != pcb->pcb_hw.apcb_asn || 2714 old_ptbr != pcb->pcb_hw.apcb_ptbr) { 2715 if (old_asn != pcb->pcb_hw.apcb_asn && 2716 old_ptbr != pcb->pcb_hw.apcb_ptbr) { 2717 TLB_COUNT(activate_both_change); 2718 } else if (old_asn != pcb->pcb_hw.apcb_asn) { 2719 TLB_COUNT(activate_asn_change); 2720 } else { 2721 TLB_COUNT(activate_ptbr_change); 2722 } 2723 (void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr); 2724 TLB_COUNT(activate_swpctx); 2725 } else { 2726 TLB_COUNT(activate_skip_swpctx); 2727 } 2728 2729 pmap_reference(pmap); 2730 ci->ci_pmap = pmap; 2731 2732 if (pmap != pmap_kernel()) { 2733 PMAP_ACT_UNLOCK(pmap); 2734 } 2735 } 2736 2737 /* 2738 * pmap_deactivate: [ INTERFACE ] 2739 * 2740 * Mark that the pmap used by the specified process is no longer 2741 * in use by the processor. 2742 */ 2743 void 2744 pmap_deactivate(struct lwp *l) 2745 { 2746 struct pmap * const pmap = l->l_proc->p_vmspace->vm_map.pmap; 2747 2748 #ifdef DEBUG 2749 if (pmapdebug & PDB_FOLLOW) 2750 printf("pmap_deactivate(%p)\n", l); 2751 #endif 2752 2753 KASSERT(kpreempt_disabled()); 2754 2755 struct cpu_info * const ci = curcpu(); 2756 2757 KASSERT(l == ci->ci_curlwp); 2758 KASSERT(pmap == ci->ci_pmap); 2759 2760 /* 2761 * There is no need to switch to a different PTBR here, 2762 * because a pmap_activate() or SWPCTX is guaranteed 2763 * before whatever lev1map we're on now is invalidated 2764 * or before user space is accessed again. 2765 * 2766 * Because only kernel mappings will be accessed before the 2767 * next pmap_activate() call, we consider our CPU to be on 2768 * the kernel pmap. 2769 */ 2770 ci->ci_pmap = pmap_kernel(); 2771 KASSERT(atomic_load_relaxed(&pmap->pm_count) > 1); 2772 pmap_destroy(pmap); 2773 } 2774 2775 /* pmap_zero_page() is in pmap_subr.s */ 2776 2777 /* pmap_copy_page() is in pmap_subr.s */ 2778 2779 /* 2780 * pmap_pageidlezero: [ INTERFACE ] 2781 * 2782 * Page zero'er for the idle loop. Returns true if the 2783 * page was zero'd, FALSE if we aborted for some reason. 2784 */ 2785 bool 2786 pmap_pageidlezero(paddr_t pa) 2787 { 2788 u_long *ptr; 2789 int i, cnt = PAGE_SIZE / sizeof(u_long); 2790 2791 for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) { 2792 if (sched_curcpu_runnable_p()) { 2793 /* 2794 * An LWP has become ready. Abort now, 2795 * so we don't keep it waiting while we 2796 * finish zeroing the page. 2797 */ 2798 return (false); 2799 } 2800 *ptr++ = 0; 2801 } 2802 2803 return (true); 2804 } 2805 2806 /* 2807 * pmap_clear_modify: [ INTERFACE ] 2808 * 2809 * Clear the modify bits on the specified physical page. 2810 */ 2811 bool 2812 pmap_clear_modify(struct vm_page *pg) 2813 { 2814 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2815 bool rv = false; 2816 kmutex_t *lock; 2817 struct pmap_tlb_context tlbctx; 2818 2819 #ifdef DEBUG 2820 if (pmapdebug & PDB_FOLLOW) 2821 printf("pmap_clear_modify(%p)\n", pg); 2822 #endif 2823 2824 pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV); 2825 2826 PMAP_HEAD_TO_MAP_LOCK(); 2827 lock = pmap_pvh_lock(pg); 2828 mutex_enter(lock); 2829 2830 if (md->pvh_listx & PGA_MODIFIED) { 2831 rv = true; 2832 pmap_changebit(pg, PG_FOW, ~0UL, &tlbctx); 2833 md->pvh_listx &= ~PGA_MODIFIED; 2834 } 2835 2836 mutex_exit(lock); 2837 PMAP_HEAD_TO_MAP_UNLOCK(); 2838 2839 pmap_tlb_shootnow(&tlbctx); 2840 TLB_COUNT(reason_clear_modify); 2841 2842 return (rv); 2843 } 2844 2845 /* 2846 * pmap_clear_reference: [ INTERFACE ] 2847 * 2848 * Clear the reference bit on the specified physical page. 2849 */ 2850 bool 2851 pmap_clear_reference(struct vm_page *pg) 2852 { 2853 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 2854 bool rv = false; 2855 kmutex_t *lock; 2856 struct pmap_tlb_context tlbctx; 2857 2858 #ifdef DEBUG 2859 if (pmapdebug & PDB_FOLLOW) 2860 printf("pmap_clear_reference(%p)\n", pg); 2861 #endif 2862 2863 pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV); 2864 2865 PMAP_HEAD_TO_MAP_LOCK(); 2866 lock = pmap_pvh_lock(pg); 2867 mutex_enter(lock); 2868 2869 if (md->pvh_listx & PGA_REFERENCED) { 2870 rv = true; 2871 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0UL, &tlbctx); 2872 md->pvh_listx &= ~PGA_REFERENCED; 2873 } 2874 2875 mutex_exit(lock); 2876 PMAP_HEAD_TO_MAP_UNLOCK(); 2877 2878 pmap_tlb_shootnow(&tlbctx); 2879 TLB_COUNT(reason_clear_reference); 2880 2881 return (rv); 2882 } 2883 2884 /* 2885 * pmap_is_referenced: [ INTERFACE ] 2886 * 2887 * Return whether or not the specified physical page is referenced 2888 * by any physical maps. 2889 */ 2890 /* See <machine/pmap.h> */ 2891 2892 /* 2893 * pmap_is_modified: [ INTERFACE ] 2894 * 2895 * Return whether or not the specified physical page is modified 2896 * by any physical maps. 2897 */ 2898 /* See <machine/pmap.h> */ 2899 2900 /* 2901 * pmap_phys_address: [ INTERFACE ] 2902 * 2903 * Return the physical address corresponding to the specified 2904 * cookie. Used by the device pager to decode a device driver's 2905 * mmap entry point return value. 2906 * 2907 * Note: no locking is necessary in this function. 2908 */ 2909 paddr_t 2910 pmap_phys_address(paddr_t ppn) 2911 { 2912 2913 return (alpha_ptob(ppn)); 2914 } 2915 2916 /* 2917 * Miscellaneous support routines follow 2918 */ 2919 2920 /* 2921 * alpha_protection_init: 2922 * 2923 * Initialize Alpha protection code array. 2924 * 2925 * Note: no locking is necessary in this function. 2926 */ 2927 static void 2928 alpha_protection_init(void) 2929 { 2930 int prot, *kp, *up; 2931 2932 kp = protection_codes[0]; 2933 up = protection_codes[1]; 2934 2935 for (prot = 0; prot < 8; prot++) { 2936 kp[prot] = PG_ASM; 2937 up[prot] = 0; 2938 2939 if (prot & VM_PROT_READ) { 2940 kp[prot] |= PG_KRE; 2941 up[prot] |= PG_KRE | PG_URE; 2942 } 2943 if (prot & VM_PROT_WRITE) { 2944 kp[prot] |= PG_KWE; 2945 up[prot] |= PG_KWE | PG_UWE; 2946 } 2947 if (prot & VM_PROT_EXECUTE) { 2948 kp[prot] |= PG_EXEC | PG_KRE; 2949 up[prot] |= PG_EXEC | PG_KRE | PG_URE; 2950 } else { 2951 kp[prot] |= PG_FOE; 2952 up[prot] |= PG_FOE; 2953 } 2954 } 2955 } 2956 2957 /* 2958 * pmap_remove_mapping: 2959 * 2960 * Invalidate a single page denoted by pmap/va. 2961 * 2962 * If (pte != NULL), it is the already computed PTE for the page. 2963 * 2964 * Note: locking in this function is complicated by the fact 2965 * that it can be called when the PV list is already locked. 2966 * (pmap_page_protect()). In this case, the caller must be 2967 * careful to get the next PV entry while we remove this entry 2968 * from beneath it. We assume that the pmap itself is already 2969 * locked; dolock applies only to the PV list. 2970 * 2971 * Returns important PTE bits that the caller needs to check for 2972 * TLB / I-stream invalidation purposes. 2973 */ 2974 static pt_entry_t 2975 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte, 2976 bool dolock, pv_entry_t *opvp, struct pmap_tlb_context * const tlbctx) 2977 { 2978 pt_entry_t opte; 2979 paddr_t pa; 2980 struct vm_page *pg; /* if != NULL, page is managed */ 2981 2982 #ifdef DEBUG 2983 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 2984 printf("pmap_remove_mapping(%p, %lx, %p, %d, %p, %p)\n", 2985 pmap, va, pte, dolock, opvp, tlbctx); 2986 #endif 2987 2988 /* 2989 * PTE not provided, compute it from pmap and va. 2990 */ 2991 if (pte == NULL) { 2992 pte = pmap_l3pte(pmap_lev1map(pmap), va, NULL); 2993 if (pmap_pte_v(pte) == 0) 2994 return 0; 2995 } 2996 2997 opte = *pte; 2998 2999 pa = PG_PFNUM(opte) << PGSHIFT; 3000 3001 /* 3002 * Update statistics 3003 */ 3004 if (pmap_pte_w(pte)) 3005 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1); 3006 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1); 3007 3008 /* 3009 * Invalidate the PTE after saving the reference modify info. 3010 */ 3011 #ifdef DEBUG 3012 if (pmapdebug & PDB_REMOVE) 3013 printf("remove: invalidating pte at %p\n", pte); 3014 #endif 3015 atomic_store_relaxed(pte, PG_NV); 3016 3017 /* 3018 * If we're removing a user mapping, check to see if we 3019 * can free page table pages. 3020 */ 3021 if (pmap != pmap_kernel()) { 3022 /* 3023 * Delete the reference on the level 3 table. It will 3024 * delete references on the level 2 and 1 tables as 3025 * appropriate. 3026 */ 3027 pmap_l3pt_delref(pmap, va, pte, tlbctx); 3028 } 3029 3030 if (opte & PG_PVLIST) { 3031 /* 3032 * Remove it from the PV table. 3033 */ 3034 pg = PHYS_TO_VM_PAGE(pa); 3035 KASSERT(pg != NULL); 3036 pmap_pv_remove(pmap, pg, va, dolock, opvp, tlbctx); 3037 KASSERT(opvp == NULL || *opvp != NULL); 3038 } 3039 3040 return opte & (PG_V | PG_ASM | PG_EXEC); 3041 } 3042 3043 /* 3044 * pmap_changebit: 3045 * 3046 * Set or clear the specified PTE bits for all mappings on the 3047 * specified page. 3048 * 3049 * Note: we assume that the pv_head is already locked, and that 3050 * the caller has acquired a PV->pmap mutex so that we can lock 3051 * the pmaps as we encounter them. 3052 */ 3053 static void 3054 pmap_changebit(struct vm_page *pg, pt_entry_t set, pt_entry_t mask, 3055 struct pmap_tlb_context * const tlbctx) 3056 { 3057 pv_entry_t pv; 3058 pt_entry_t *pte, npte, opte; 3059 3060 #ifdef DEBUG 3061 if (pmapdebug & PDB_BITS) 3062 printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n", 3063 pg, set, mask); 3064 #endif 3065 3066 /* 3067 * Loop over all current mappings setting/clearing as apropos. 3068 */ 3069 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 3070 PMAP_LOCK(pv->pv_pmap); 3071 3072 pte = pv->pv_pte; 3073 3074 opte = atomic_load_relaxed(pte); 3075 npte = (opte | set) & mask; 3076 if (npte != opte) { 3077 atomic_store_relaxed(pte, npte); 3078 pmap_tlb_shootdown_pv(pv->pv_pmap, pv->pv_va, 3079 opte, tlbctx); 3080 } 3081 PMAP_UNLOCK(pv->pv_pmap); 3082 } 3083 } 3084 3085 /* 3086 * pmap_emulate_reference: 3087 * 3088 * Emulate reference and/or modified bit hits. 3089 * Return 1 if this was an execute fault on a non-exec mapping, 3090 * otherwise return 0. 3091 */ 3092 int 3093 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type) 3094 { 3095 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 3096 pt_entry_t faultoff, *pte; 3097 struct vm_page *pg; 3098 paddr_t pa; 3099 bool didlock = false; 3100 bool exec = false; 3101 kmutex_t *lock; 3102 3103 #ifdef DEBUG 3104 if (pmapdebug & PDB_FOLLOW) 3105 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n", 3106 l, v, user, type); 3107 #endif 3108 3109 /* 3110 * Convert process and virtual address to physical address. 3111 */ 3112 if (v >= VM_MIN_KERNEL_ADDRESS) { 3113 if (user) 3114 panic("pmap_emulate_reference: user ref to kernel"); 3115 /* 3116 * No need to lock here; kernel PT pages never go away. 3117 */ 3118 pte = PMAP_KERNEL_PTE(v); 3119 } else { 3120 #ifdef DIAGNOSTIC 3121 if (l == NULL) 3122 panic("pmap_emulate_reference: bad proc"); 3123 if (l->l_proc->p_vmspace == NULL) 3124 panic("pmap_emulate_reference: bad p_vmspace"); 3125 #endif 3126 PMAP_LOCK(pmap); 3127 didlock = true; 3128 pte = pmap_l3pte(pmap_lev1map(pmap), v, NULL); 3129 /* 3130 * We'll unlock below where we're done with the PTE. 3131 */ 3132 } 3133 exec = pmap_pte_exec(pte); 3134 if (!exec && type == ALPHA_MMCSR_FOE) { 3135 if (didlock) 3136 PMAP_UNLOCK(pmap); 3137 return (1); 3138 } 3139 #ifdef DEBUG 3140 if (pmapdebug & PDB_FOLLOW) { 3141 printf("\tpte = %p, ", pte); 3142 printf("*pte = 0x%lx\n", *pte); 3143 } 3144 #endif 3145 3146 pa = pmap_pte_pa(pte); 3147 3148 /* 3149 * We're now done with the PTE. If it was a user pmap, unlock 3150 * it now. 3151 */ 3152 if (didlock) 3153 PMAP_UNLOCK(pmap); 3154 3155 #ifdef DEBUG 3156 if (pmapdebug & PDB_FOLLOW) 3157 printf("\tpa = 0x%lx\n", pa); 3158 #endif 3159 #ifdef DIAGNOSTIC 3160 if (!uvm_pageismanaged(pa)) 3161 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): " 3162 "pa 0x%lx not managed", l, v, user, type, pa); 3163 #endif 3164 3165 /* 3166 * Twiddle the appropriate bits to reflect the reference 3167 * and/or modification.. 3168 * 3169 * The rules: 3170 * (1) always mark page as used, and 3171 * (2) if it was a write fault, mark page as modified. 3172 */ 3173 pg = PHYS_TO_VM_PAGE(pa); 3174 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3175 struct pmap_tlb_context tlbctx; 3176 3177 pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV); 3178 3179 PMAP_HEAD_TO_MAP_LOCK(); 3180 lock = pmap_pvh_lock(pg); 3181 mutex_enter(lock); 3182 3183 if (type == ALPHA_MMCSR_FOW) { 3184 md->pvh_listx |= (PGA_REFERENCED|PGA_MODIFIED); 3185 faultoff = PG_FOR | PG_FOW; 3186 } else { 3187 md->pvh_listx |= PGA_REFERENCED; 3188 faultoff = PG_FOR; 3189 if (exec) { 3190 faultoff |= PG_FOE; 3191 } 3192 } 3193 pmap_changebit(pg, 0, ~faultoff, &tlbctx); 3194 3195 mutex_exit(lock); 3196 PMAP_HEAD_TO_MAP_UNLOCK(); 3197 3198 pmap_tlb_shootnow(&tlbctx); 3199 TLB_COUNT(reason_emulate_reference); 3200 3201 return (0); 3202 } 3203 3204 #ifdef DEBUG 3205 /* 3206 * pmap_pv_dump: 3207 * 3208 * Dump the physical->virtual data for the specified page. 3209 */ 3210 void 3211 pmap_pv_dump(paddr_t pa) 3212 { 3213 struct vm_page *pg; 3214 struct vm_page_md *md; 3215 pv_entry_t pv; 3216 kmutex_t *lock; 3217 3218 pg = PHYS_TO_VM_PAGE(pa); 3219 md = VM_PAGE_TO_MD(pg); 3220 3221 lock = pmap_pvh_lock(pg); 3222 mutex_enter(lock); 3223 3224 printf("pa 0x%lx (attrs = 0x%lx):\n", pa, md->pvh_listx & PGA_ATTRS); 3225 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) 3226 printf(" pmap %p, va 0x%lx\n", 3227 pv->pv_pmap, pv->pv_va); 3228 printf("\n"); 3229 3230 mutex_exit(lock); 3231 } 3232 #endif 3233 3234 /* 3235 * vtophys: 3236 * 3237 * Return the physical address corresponding to the K0SEG or 3238 * K1SEG address provided. 3239 * 3240 * Note: no locking is necessary in this function. 3241 */ 3242 static bool 3243 vtophys_internal(vaddr_t const vaddr, paddr_t * const pap) 3244 { 3245 paddr_t pa; 3246 3247 KASSERT(vaddr >= ALPHA_K0SEG_BASE); 3248 3249 if (vaddr <= ALPHA_K0SEG_END) { 3250 pa = ALPHA_K0SEG_TO_PHYS(vaddr); 3251 } else { 3252 pt_entry_t * const pte = PMAP_KERNEL_PTE(vaddr); 3253 if (__predict_false(! pmap_pte_v(pte))) { 3254 return false; 3255 } 3256 pa = pmap_pte_pa(pte) | (vaddr & PGOFSET); 3257 } 3258 3259 if (pap != NULL) { 3260 *pap = pa; 3261 } 3262 3263 return true; 3264 } 3265 3266 paddr_t 3267 vtophys(vaddr_t const vaddr) 3268 { 3269 paddr_t pa; 3270 3271 if (__predict_false(! vtophys_internal(vaddr, &pa))) 3272 pa = 0; 3273 return pa; 3274 } 3275 3276 /******************** pv_entry management ********************/ 3277 3278 /* 3279 * pmap_pv_enter: 3280 * 3281 * Add a physical->virtual entry to the pv_table. 3282 */ 3283 static int 3284 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte, 3285 bool dolock, pv_entry_t newpv) 3286 { 3287 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3288 kmutex_t *lock; 3289 3290 /* 3291 * Allocate and fill in the new pv_entry. 3292 */ 3293 if (newpv == NULL) { 3294 newpv = pmap_pv_alloc(); 3295 if (newpv == NULL) 3296 return ENOMEM; 3297 } 3298 newpv->pv_va = va; 3299 newpv->pv_pmap = pmap; 3300 newpv->pv_pte = pte; 3301 3302 if (dolock) { 3303 lock = pmap_pvh_lock(pg); 3304 mutex_enter(lock); 3305 } 3306 3307 #ifdef DEBUG 3308 { 3309 pv_entry_t pv; 3310 /* 3311 * Make sure the entry doesn't already exist. 3312 */ 3313 for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) { 3314 if (pmap == pv->pv_pmap && va == pv->pv_va) { 3315 printf("pmap = %p, va = 0x%lx\n", pmap, va); 3316 panic("pmap_pv_enter: already in pv table"); 3317 } 3318 } 3319 } 3320 #endif 3321 3322 /* 3323 * ...and put it in the list. 3324 */ 3325 uintptr_t const attrs = md->pvh_listx & PGA_ATTRS; 3326 newpv->pv_next = (struct pv_entry *)(md->pvh_listx & ~PGA_ATTRS); 3327 md->pvh_listx = (uintptr_t)newpv | attrs; 3328 LIST_INSERT_HEAD(&pmap->pm_pvents, newpv, pv_link); 3329 3330 if (dolock) { 3331 mutex_exit(lock); 3332 } 3333 3334 return 0; 3335 } 3336 3337 /* 3338 * pmap_pv_remove: 3339 * 3340 * Remove a physical->virtual entry from the pv_table. 3341 */ 3342 static void 3343 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock, 3344 pv_entry_t *opvp, struct pmap_tlb_context * const tlbctx) 3345 { 3346 struct vm_page_md * const md = VM_PAGE_TO_MD(pg); 3347 pv_entry_t pv, *pvp; 3348 kmutex_t *lock; 3349 3350 if (dolock) { 3351 lock = pmap_pvh_lock(pg); 3352 mutex_enter(lock); 3353 } else { 3354 lock = NULL; /* XXX stupid gcc */ 3355 } 3356 3357 /* 3358 * Find the entry to remove. 3359 */ 3360 for (pvp = (struct pv_entry **)&md->pvh_listx, pv = VM_MDPAGE_PVS(pg); 3361 pv != NULL; pvp = &pv->pv_next, pv = *pvp) 3362 if (pmap == pv->pv_pmap && va == pv->pv_va) 3363 break; 3364 3365 KASSERT(pv != NULL); 3366 3367 /* 3368 * The page attributes are in the lower 2 bits of the first 3369 * PV entry pointer. Rather than comparing the pointer address 3370 * and branching, we just always preserve what might be there 3371 * (either attribute bits or zero bits). 3372 */ 3373 *pvp = (pv_entry_t)((uintptr_t)pv->pv_next | 3374 (((uintptr_t)*pvp) & PGA_ATTRS)); 3375 LIST_REMOVE(pv, pv_link); 3376 3377 if (dolock) { 3378 mutex_exit(lock); 3379 } 3380 3381 if (opvp != NULL) { 3382 *opvp = pv; 3383 } else { 3384 KASSERT(tlbctx != NULL); 3385 LIST_INSERT_HEAD(&tlbctx->t_freepvq, pv, pv_link); 3386 } 3387 } 3388 3389 /* 3390 * pmap_pv_page_alloc: 3391 * 3392 * Allocate a page for the pv_entry pool. 3393 */ 3394 static void * 3395 pmap_pv_page_alloc(struct pool *pp, int flags) 3396 { 3397 struct vm_page * const pg = pmap_physpage_alloc(PGU_PVENT); 3398 if (__predict_false(pg == NULL)) { 3399 return NULL; 3400 } 3401 return (void *)ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(pg)); 3402 } 3403 3404 /* 3405 * pmap_pv_page_free: 3406 * 3407 * Free a pv_entry pool page. 3408 */ 3409 static void 3410 pmap_pv_page_free(struct pool *pp, void *v) 3411 { 3412 3413 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v)); 3414 } 3415 3416 /******************** misc. functions ********************/ 3417 3418 /* 3419 * pmap_physpage_alloc: 3420 * 3421 * Allocate a single page from the VM system and return the 3422 * physical address for that page. 3423 */ 3424 static struct vm_page * 3425 pmap_physpage_alloc(int usage) 3426 { 3427 struct vm_page *pg; 3428 3429 /* 3430 * Don't ask for a zero'd page in the L1PT case -- we will 3431 * properly initialize it in the constructor. 3432 */ 3433 3434 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ? 3435 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO); 3436 if (pg != NULL) { 3437 KASSERT(PHYSPAGE_REFCNT(pg) == 0); 3438 } 3439 return pg; 3440 } 3441 3442 /* 3443 * pmap_physpage_free: 3444 * 3445 * Free the single page table page at the specified physical address. 3446 */ 3447 static void 3448 pmap_physpage_free(paddr_t pa) 3449 { 3450 struct vm_page *pg; 3451 3452 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) 3453 panic("pmap_physpage_free: bogus physical page address"); 3454 3455 KASSERT(PHYSPAGE_REFCNT(pg) == 0); 3456 3457 uvm_pagefree(pg); 3458 } 3459 3460 /* 3461 * pmap_physpage_addref: 3462 * 3463 * Add a reference to the specified special use page. 3464 */ 3465 static int 3466 pmap_physpage_addref(void *kva) 3467 { 3468 struct vm_page *pg; 3469 paddr_t pa; 3470 3471 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 3472 pg = PHYS_TO_VM_PAGE(pa); 3473 3474 KASSERT(PHYSPAGE_REFCNT(pg) < UINT32_MAX); 3475 3476 return PHYSPAGE_REFCNT_INC(pg); 3477 } 3478 3479 /* 3480 * pmap_physpage_delref: 3481 * 3482 * Delete a reference to the specified special use page. 3483 */ 3484 static int 3485 pmap_physpage_delref(void *kva) 3486 { 3487 struct vm_page *pg; 3488 paddr_t pa; 3489 3490 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva)); 3491 pg = PHYS_TO_VM_PAGE(pa); 3492 3493 KASSERT(PHYSPAGE_REFCNT(pg) != 0); 3494 3495 return PHYSPAGE_REFCNT_DEC(pg); 3496 } 3497 3498 /******************** page table page management ********************/ 3499 3500 static bool 3501 pmap_kptpage_alloc(paddr_t *pap) 3502 { 3503 if (uvm.page_init_done == false) { 3504 /* 3505 * We're growing the kernel pmap early (from 3506 * uvm_pageboot_alloc()). This case must 3507 * be handled a little differently. 3508 */ 3509 *pap = ALPHA_K0SEG_TO_PHYS( 3510 pmap_steal_memory(PAGE_SIZE, NULL, NULL)); 3511 return true; 3512 } 3513 3514 struct vm_page * const pg = pmap_physpage_alloc(PGU_NORMAL); 3515 if (__predict_true(pg != NULL)) { 3516 *pap = VM_PAGE_TO_PHYS(pg); 3517 return true; 3518 } 3519 return false; 3520 } 3521 3522 /* 3523 * pmap_growkernel: [ INTERFACE ] 3524 * 3525 * Grow the kernel address space. This is a hint from the 3526 * upper layer to pre-allocate more kernel PT pages. 3527 */ 3528 vaddr_t 3529 pmap_growkernel(vaddr_t maxkvaddr) 3530 { 3531 struct pmap *pm; 3532 paddr_t ptaddr; 3533 pt_entry_t *l1pte, *l2pte, pte; 3534 pt_entry_t *lev1map; 3535 vaddr_t va; 3536 int l1idx; 3537 3538 rw_enter(&pmap_growkernel_lock, RW_WRITER); 3539 3540 if (maxkvaddr <= virtual_end) 3541 goto out; /* we are OK */ 3542 3543 pmap_growkernel_evcnt.ev_count++; 3544 3545 va = virtual_end; 3546 3547 while (va < maxkvaddr) { 3548 /* 3549 * If there is no valid L1 PTE (i.e. no L2 PT page), 3550 * allocate a new L2 PT page and insert it into the 3551 * L1 map. 3552 */ 3553 l1pte = pmap_l1pte(kernel_lev1map, va); 3554 if (pmap_pte_v(l1pte) == 0) { 3555 if (!pmap_kptpage_alloc(&ptaddr)) 3556 goto die; 3557 pte = (atop(ptaddr) << PG_SHIFT) | 3558 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3559 *l1pte = pte; 3560 3561 l1idx = l1pte_index(va); 3562 3563 /* Update all the user pmaps. */ 3564 mutex_enter(&pmap_all_pmaps_lock); 3565 for (pm = TAILQ_FIRST(&pmap_all_pmaps); 3566 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) { 3567 /* Skip the kernel pmap. */ 3568 if (pm == pmap_kernel()) 3569 continue; 3570 3571 /* 3572 * Any pmaps published on the global list 3573 * should never be referencing kernel_lev1map. 3574 */ 3575 lev1map = pmap_lev1map(pm); 3576 KASSERT(lev1map != kernel_lev1map); 3577 3578 PMAP_LOCK(pm); 3579 lev1map[l1idx] = pte; 3580 PMAP_UNLOCK(pm); 3581 } 3582 mutex_exit(&pmap_all_pmaps_lock); 3583 } 3584 3585 /* 3586 * Have an L2 PT page now, add the L3 PT page. 3587 */ 3588 l2pte = pmap_l2pte(kernel_lev1map, va, l1pte); 3589 KASSERT(pmap_pte_v(l2pte) == 0); 3590 if (!pmap_kptpage_alloc(&ptaddr)) 3591 goto die; 3592 *l2pte = (atop(ptaddr) << PG_SHIFT) | 3593 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED; 3594 va += ALPHA_L2SEG_SIZE; 3595 } 3596 3597 /* Invalidate the L1 PT cache. */ 3598 pool_cache_invalidate(&pmap_l1pt_cache); 3599 3600 virtual_end = va; 3601 3602 out: 3603 rw_exit(&pmap_growkernel_lock); 3604 3605 return (virtual_end); 3606 3607 die: 3608 panic("pmap_growkernel: out of memory"); 3609 } 3610 3611 /* 3612 * pmap_l1pt_ctor: 3613 * 3614 * Pool cache constructor for L1 PT pages. 3615 * 3616 * Note: The growkernel lock is held across allocations 3617 * from our pool_cache, so we don't need to acquire it 3618 * ourselves. 3619 */ 3620 static int 3621 pmap_l1pt_ctor(void *arg, void *object, int flags) 3622 { 3623 pt_entry_t *l1pt = object, pte; 3624 int i; 3625 3626 /* 3627 * Initialize the new level 1 table by zeroing the 3628 * user portion and copying the kernel mappings into 3629 * the kernel portion. 3630 */ 3631 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++) 3632 l1pt[i] = 0; 3633 3634 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS); 3635 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++) 3636 l1pt[i] = kernel_lev1map[i]; 3637 3638 /* 3639 * Now, map the new virtual page table. NOTE: NO ASM! 3640 */ 3641 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) | 3642 PG_V | PG_KRE | PG_KWE; 3643 l1pt[l1pte_index(VPTBASE)] = pte; 3644 3645 return (0); 3646 } 3647 3648 /* 3649 * pmap_l1pt_alloc: 3650 * 3651 * Page allocator for L1 PT pages. 3652 */ 3653 static void * 3654 pmap_l1pt_alloc(struct pool *pp, int flags) 3655 { 3656 /* 3657 * Attempt to allocate a free page. 3658 */ 3659 struct vm_page * const pg = pmap_physpage_alloc(PGU_L1PT); 3660 if (__predict_false(pg == NULL)) { 3661 return NULL; 3662 } 3663 return (void *)ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(pg)); 3664 } 3665 3666 /* 3667 * pmap_l1pt_free: 3668 * 3669 * Page freer for L1 PT pages. 3670 */ 3671 static void 3672 pmap_l1pt_free(struct pool *pp, void *v) 3673 { 3674 3675 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v)); 3676 } 3677 3678 /* 3679 * pmap_ptpage_alloc: 3680 * 3681 * Allocate a level 2 or level 3 page table page for a user 3682 * pmap, and initialize the PTE that references it. 3683 * 3684 * Note: the pmap must already be locked. 3685 */ 3686 static int 3687 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t * const pte, int const usage) 3688 { 3689 /* 3690 * Allocate the page table page. 3691 */ 3692 struct vm_page * const pg = pmap_physpage_alloc(usage); 3693 if (__predict_false(pg == NULL)) { 3694 return ENOMEM; 3695 } 3696 3697 LIST_INSERT_HEAD(&pmap->pm_ptpages, pg, pageq.list); 3698 3699 /* 3700 * Initialize the referencing PTE. 3701 */ 3702 const pt_entry_t npte = ((VM_PAGE_TO_PHYS(pg) >> PGSHIFT) << PG_SHIFT) | 3703 PG_V | PG_KRE | PG_KWE | PG_WIRED; 3704 3705 atomic_store_relaxed(pte, npte); 3706 3707 return (0); 3708 } 3709 3710 /* 3711 * pmap_ptpage_free: 3712 * 3713 * Free the level 2 or level 3 page table page referenced 3714 * be the provided PTE. 3715 * 3716 * Note: the pmap must already be locked. 3717 */ 3718 static void 3719 pmap_ptpage_free(pmap_t pmap, pt_entry_t * const pte, 3720 struct pmap_tlb_context * const tlbctx) 3721 { 3722 3723 /* 3724 * Extract the physical address of the page from the PTE 3725 * and clear the entry. 3726 */ 3727 const paddr_t ptpa = pmap_pte_pa(pte); 3728 atomic_store_relaxed(pte, PG_NV); 3729 3730 struct vm_page * const pg = PHYS_TO_VM_PAGE(ptpa); 3731 KASSERT(pg != NULL); 3732 3733 KASSERT(PHYSPAGE_REFCNT(pg) == 0); 3734 #ifdef DEBUG 3735 pmap_zero_page(ptpa); 3736 #endif 3737 3738 LIST_REMOVE(pg, pageq.list); 3739 LIST_INSERT_HEAD(&tlbctx->t_freeptq, pg, pageq.list); 3740 } 3741 3742 /* 3743 * pmap_l3pt_delref: 3744 * 3745 * Delete a reference on a level 3 PT page. If the reference drops 3746 * to zero, free it. 3747 * 3748 * Note: the pmap must already be locked. 3749 */ 3750 static void 3751 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, 3752 struct pmap_tlb_context * const tlbctx) 3753 { 3754 pt_entry_t *l1pte, *l2pte; 3755 pt_entry_t * const lev1map = pmap_lev1map(pmap); 3756 3757 l1pte = pmap_l1pte(lev1map, va); 3758 l2pte = pmap_l2pte(lev1map, va, l1pte); 3759 3760 #ifdef DIAGNOSTIC 3761 if (pmap == pmap_kernel()) 3762 panic("pmap_l3pt_delref: kernel pmap"); 3763 #endif 3764 3765 if (pmap_physpage_delref(l3pte) == 0) { 3766 /* 3767 * No more mappings; we can free the level 3 table. 3768 */ 3769 #ifdef DEBUG 3770 if (pmapdebug & PDB_PTPAGE) 3771 printf("pmap_l3pt_delref: freeing level 3 table at " 3772 "0x%lx\n", pmap_pte_pa(l2pte)); 3773 #endif 3774 /* 3775 * You can pass NULL if you know the last reference won't 3776 * be dropped. 3777 */ 3778 KASSERT(tlbctx != NULL); 3779 pmap_ptpage_free(pmap, l2pte, tlbctx); 3780 3781 /* 3782 * We've freed a level 3 table, so we must invalidate 3783 * any now-stale TLB entries for the corresponding VPT 3784 * VA range. Easiest way to guarantee this is to hit 3785 * all of the user TLB entries. 3786 */ 3787 pmap_tlb_shootdown_all_user(pmap, PG_V, tlbctx); 3788 3789 /* 3790 * We've freed a level 3 table, so delete the reference 3791 * on the level 2 table. 3792 */ 3793 pmap_l2pt_delref(pmap, l1pte, l2pte, tlbctx); 3794 } 3795 } 3796 3797 /* 3798 * pmap_l2pt_delref: 3799 * 3800 * Delete a reference on a level 2 PT page. If the reference drops 3801 * to zero, free it. 3802 * 3803 * Note: the pmap must already be locked. 3804 */ 3805 static void 3806 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte, 3807 struct pmap_tlb_context * const tlbctx) 3808 { 3809 3810 #ifdef DIAGNOSTIC 3811 if (pmap == pmap_kernel()) 3812 panic("pmap_l2pt_delref: kernel pmap"); 3813 #endif 3814 3815 if (pmap_physpage_delref(l2pte) == 0) { 3816 /* 3817 * No more mappings in this segment; we can free the 3818 * level 2 table. 3819 */ 3820 #ifdef DEBUG 3821 if (pmapdebug & PDB_PTPAGE) 3822 printf("pmap_l2pt_delref: freeing level 2 table at " 3823 "0x%lx\n", pmap_pte_pa(l1pte)); 3824 #endif 3825 /* 3826 * You can pass NULL if you know the last reference won't 3827 * be dropped. 3828 */ 3829 KASSERT(tlbctx != NULL); 3830 pmap_ptpage_free(pmap, l1pte, tlbctx); 3831 3832 /* 3833 * We've freed a level 2 table, so we must invalidate 3834 * any now-stale TLB entries for the corresponding VPT 3835 * VA range. Easiest way to guarantee this is to hit 3836 * all of the user TLB entries. 3837 */ 3838 pmap_tlb_shootdown_all_user(pmap, PG_V, tlbctx); 3839 3840 /* 3841 * We've freed a level 2 table, so delete the reference 3842 * on the level 1 table. 3843 */ 3844 pmap_l1pt_delref(pmap, l1pte); 3845 } 3846 } 3847 3848 /* 3849 * pmap_l1pt_delref: 3850 * 3851 * Delete a reference on a level 1 PT page. 3852 */ 3853 static void 3854 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte) 3855 { 3856 3857 KASSERT(pmap != pmap_kernel()); 3858 3859 (void)pmap_physpage_delref(l1pte); 3860 } 3861 3862 /******************** Address Space Number management ********************/ 3863 3864 /* 3865 * pmap_asn_alloc: 3866 * 3867 * Allocate and assign an ASN to the specified pmap. 3868 * 3869 * Note: the pmap must already be locked. This may be called from 3870 * an interprocessor interrupt, and in that case, the sender of 3871 * the IPI has the pmap lock. 3872 */ 3873 static u_int 3874 pmap_asn_alloc(pmap_t const pmap, struct cpu_info * const ci) 3875 { 3876 3877 #ifdef DEBUG 3878 if (pmapdebug & (PDB_FOLLOW|PDB_ASN)) 3879 printf("pmap_asn_alloc(%p)\n", pmap); 3880 #endif 3881 3882 KASSERT(pmap != pmap_kernel()); 3883 KASSERT(pmap->pm_percpu[ci->ci_cpuid].pmc_lev1map != kernel_lev1map); 3884 KASSERT(kpreempt_disabled()); 3885 3886 /* No work to do if the CPU does not implement ASNs. */ 3887 if (pmap_max_asn == 0) 3888 return 0; 3889 3890 struct pmap_percpu * const pmc = &pmap->pm_percpu[ci->ci_cpuid]; 3891 3892 /* 3893 * Hopefully, we can continue using the one we have... 3894 * 3895 * N.B. the generation check will fail the first time 3896 * any pmap is activated on a given CPU, because we start 3897 * the generation counter at 1, but initialize pmaps with 3898 * 0; this forces the first ASN allocation to occur. 3899 */ 3900 if (pmc->pmc_asngen == ci->ci_asn_gen) { 3901 #ifdef DEBUG 3902 if (pmapdebug & PDB_ASN) 3903 printf("pmap_asn_alloc: same generation, keeping %u\n", 3904 pmc->pmc_asn); 3905 #endif 3906 TLB_COUNT(asn_reuse); 3907 return pmc->pmc_asn; 3908 } 3909 3910 /* 3911 * Need to assign a new ASN. Grab the next one, incrementing 3912 * the generation number if we have to. 3913 */ 3914 if (ci->ci_next_asn > pmap_max_asn) { 3915 /* 3916 * Invalidate all non-PG_ASM TLB entries and the 3917 * I-cache, and bump the generation number. 3918 */ 3919 ALPHA_TBIAP(); 3920 alpha_pal_imb(); 3921 3922 ci->ci_next_asn = PMAP_ASN_FIRST_USER; 3923 ci->ci_asn_gen++; 3924 TLB_COUNT(asn_newgen); 3925 3926 /* 3927 * Make sure the generation number doesn't wrap. We could 3928 * handle this scenario by traversing all of the pmaps, 3929 * and invalidating the generation number on those which 3930 * are not currently in use by this processor. 3931 * 3932 * However... considering that we're using an unsigned 64-bit 3933 * integer for generation numbers, on non-ASN CPUs, we won't 3934 * wrap for approximately 75 billion years on a 128-ASN CPU 3935 * (assuming 1000 switch * operations per second). 3936 * 3937 * So, we don't bother. 3938 */ 3939 KASSERT(ci->ci_asn_gen != PMAP_ASNGEN_INVALID); 3940 #ifdef DEBUG 3941 if (pmapdebug & PDB_ASN) 3942 printf("pmap_asn_alloc: generation bumped to %lu\n", 3943 ci->ci_asn_gen); 3944 #endif 3945 } 3946 3947 /* 3948 * Assign the new ASN and validate the generation number. 3949 */ 3950 pmc->pmc_asn = ci->ci_next_asn++; 3951 pmc->pmc_asngen = ci->ci_asn_gen; 3952 TLB_COUNT(asn_assign); 3953 3954 /* 3955 * We have a new ASN, so we can skip any pending I-stream sync 3956 * on the way back out to user space. 3957 */ 3958 pmc->pmc_needisync = 0; 3959 3960 #ifdef DEBUG 3961 if (pmapdebug & PDB_ASN) 3962 printf("pmap_asn_alloc: assigning %u to pmap %p\n", 3963 pmc->pmc_asn, pmap); 3964 #endif 3965 return pmc->pmc_asn; 3966 } 3967