Home | History | Annotate | Line # | Download | only in alpha
      1 /* $NetBSD: pmap.c,v 1.308 2023/12/30 23:07:42 thorpej Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020
      5  * 	The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
     10  * NASA Ames Research Center, by Andrew Doran and Mindaugas Rasiukevicius,
     11  * and by Chris G. Demetriou.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32  * POSSIBILITY OF SUCH DAMAGE.
     33  */
     34 
     35 /*
     36  * Copyright (c) 1991, 1993
     37  *	The Regents of the University of California.  All rights reserved.
     38  *
     39  * This code is derived from software contributed to Berkeley by
     40  * the Systems Programming Group of the University of Utah Computer
     41  * Science Department.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. Neither the name of the University nor the names of its contributors
     52  *    may be used to endorse or promote products derived from this software
     53  *    without specific prior written permission.
     54  *
     55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     65  * SUCH DAMAGE.
     66  *
     67  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
     68  */
     69 
     70 /*
     71  * DEC Alpha physical map management code.
     72  *
     73  * History:
     74  *
     75  *	This pmap started life as a Motorola 68851/68030 pmap,
     76  *	written by Mike Hibler at the University of Utah.
     77  *
     78  *	It was modified for the DEC Alpha by Chris Demetriou
     79  *	at Carnegie Mellon University.
     80  *
     81  *	Support for non-contiguous physical memory was added by
     82  *	Jason R. Thorpe of the Numerical Aerospace Simulation
     83  *	Facility, NASA Ames Research Center and Chris Demetriou.
     84  *
     85  *	Page table management and a major cleanup were undertaken
     86  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
     87  *	Avalon Computer Systems and from Chris Demetriou.
     88  *
     89  *	Support for the new UVM pmap interface was written by
     90  *	Jason R. Thorpe.
     91  *
     92  *	Support for ASNs was written by Jason R. Thorpe, again
     93  *	with help from Chris Demetriou and Ross Harvey.
     94  *
     95  *	The locking protocol was written by Jason R. Thorpe,
     96  *	using Chuck Cranor's i386 pmap for UVM as a model.
     97  *
     98  *	TLB shootdown code was written (and then subsequently
     99  *	rewritten some years later, borrowing some ideas from
    100  *	the x86 pmap) by Jason R. Thorpe.
    101  *
    102  *	Multiprocessor modifications by Andrew Doran and
    103  *	Jason R. Thorpe.
    104  *
    105  * Notes:
    106  *
    107  *	All user page table access is done via K0SEG.  Kernel
    108  *	page table access is done via the recursive Virtual Page
    109  *	Table because kernel PT pages are pre-allocated and never
    110  *	freed, so no VPT fault handling is required.
    111  */
    112 
    113 /*
    114  *	Manages physical address maps.
    115  *
    116  *	Since the information managed by this module is
    117  *	also stored by the logical address mapping module,
    118  *	this module may throw away valid virtual-to-physical
    119  *	mappings at almost any time.  However, invalidations
    120  *	of virtual-to-physical mappings must be done as
    121  *	requested.
    122  *
    123  *	In order to cope with hardware architectures which
    124  *	make virtual-to-physical map invalidates expensive,
    125  *	this module may delay invalidate or reduced protection
    126  *	operations until such time as they are actually
    127  *	necessary.  This module is given full information as
    128  *	to which processors are currently using which maps,
    129  *	and to when physical maps must be made correct.
    130  */
    131 
    132 #include "opt_lockdebug.h"
    133 #include "opt_sysv.h"
    134 #include "opt_multiprocessor.h"
    135 
    136 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
    137 
    138 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.308 2023/12/30 23:07:42 thorpej Exp $");
    139 
    140 #include <sys/param.h>
    141 #include <sys/systm.h>
    142 #include <sys/kernel.h>
    143 #include <sys/proc.h>
    144 #include <sys/pool.h>
    145 #include <sys/buf.h>
    146 #include <sys/evcnt.h>
    147 #include <sys/atomic.h>
    148 #include <sys/cpu.h>
    149 
    150 #include <uvm/uvm.h>
    151 
    152 #if defined(MULTIPROCESSOR)
    153 #include <machine/rpb.h>
    154 #endif
    155 
    156 #ifdef DEBUG
    157 #define	PDB_FOLLOW	0x0001
    158 #define	PDB_INIT	0x0002
    159 #define	PDB_ENTER	0x0004
    160 #define	PDB_REMOVE	0x0008
    161 #define	PDB_CREATE	0x0010
    162 #define	PDB_PTPAGE	0x0020
    163 #define	PDB_ASN		0x0040
    164 #define	PDB_BITS	0x0080
    165 #define	PDB_COLLECT	0x0100
    166 #define	PDB_PROTECT	0x0200
    167 #define	PDB_BOOTSTRAP	0x1000
    168 #define	PDB_PARANOIA	0x2000
    169 #define	PDB_WIRING	0x4000
    170 #define	PDB_PVDUMP	0x8000
    171 
    172 int debugmap = 0;
    173 int pmapdebug = PDB_PARANOIA;
    174 #endif
    175 
    176 #if defined(MULTIPROCESSOR)
    177 #define	PMAP_MP(x)	x
    178 #else
    179 #define	PMAP_MP(x)	__nothing
    180 #endif /* MULTIPROCESSOR */
    181 
    182 /*
    183  * Given a map and a machine independent protection code,
    184  * convert to an alpha protection code.
    185  */
    186 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
    187 static int	protection_codes[2][8] __read_mostly;
    188 
    189 /*
    190  * kernel_lev1map:
    191  *
    192  *	Kernel level 1 page table.  This maps all kernel level 2
    193  *	page table pages, and is used as a template for all user
    194  *	pmap level 1 page tables.  When a new user level 1 page
    195  *	table is allocated, all kernel_lev1map PTEs for kernel
    196  *	addresses are copied to the new map.
    197  *
    198  *	The kernel also has an initial set of kernel level 2 page
    199  *	table pages.  These map the kernel level 3 page table pages.
    200  *	As kernel level 3 page table pages are added, more level 2
    201  *	page table pages may be added to map them.  These pages are
    202  *	never freed.
    203  *
    204  *	Finally, the kernel also has an initial set of kernel level
    205  *	3 page table pages.  These map pages in K1SEG.  More level
    206  *	3 page table pages may be added at run-time if additional
    207  *	K1SEG address space is required.  These pages are never freed.
    208  *
    209  * NOTE: When mappings are inserted into the kernel pmap, all
    210  * level 2 and level 3 page table pages must already be allocated
    211  * and mapped into the parent page table.
    212  */
    213 pt_entry_t	*kernel_lev1map __read_mostly;
    214 
    215 /*
    216  * Virtual Page Table.
    217  */
    218 static pt_entry_t *VPT __read_mostly;
    219 
    220 static struct {
    221 	struct pmap k_pmap;
    222 } kernel_pmap_store __cacheline_aligned;
    223 
    224 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap;
    225 
    226 /* PA of first available physical page */
    227 paddr_t    	avail_start __read_mostly;
    228 
    229 /* PA of last available physical page */
    230 paddr_t		avail_end __read_mostly;
    231 
    232 /* VA of last avail page (end of kernel AS) */
    233 static vaddr_t	virtual_end __read_mostly;
    234 
    235 /* Has pmap_init completed? */
    236 static bool pmap_initialized __read_mostly;
    237 
    238 /* Instrumentation */
    239 u_long		pmap_pages_stolen __read_mostly;
    240 
    241 /*
    242  * This variable contains the number of CPU IDs we need to allocate
    243  * space for when allocating the pmap structure.  It is used to
    244  * size a per-CPU array of ASN and ASN Generation number.
    245  */
    246 static u_long 	pmap_ncpuids __read_mostly;
    247 
    248 #ifndef PMAP_PV_LOWAT
    249 #define	PMAP_PV_LOWAT	16
    250 #endif
    251 int		pmap_pv_lowat __read_mostly = PMAP_PV_LOWAT;
    252 
    253 /*
    254  * List of all pmaps, used to update them when e.g. additional kernel
    255  * page tables are allocated.  This list is kept LRU-ordered by
    256  * pmap_activate().
    257  */
    258 static TAILQ_HEAD(, pmap) pmap_all_pmaps __cacheline_aligned;
    259 
    260 /*
    261  * Instrument the number of calls to pmap_growkernel().
    262  */
    263 static struct evcnt pmap_growkernel_evcnt __read_mostly;
    264 
    265 /*
    266  * The pools from which pmap structures and sub-structures are allocated.
    267  */
    268 static struct pool_cache pmap_pmap_cache __read_mostly;
    269 static struct pool_cache pmap_l1pt_cache __read_mostly;
    270 static struct pool_cache pmap_pv_cache __read_mostly;
    271 
    272 CTASSERT(offsetof(struct pmap, pm_percpu[0]) == COHERENCY_UNIT);
    273 CTASSERT(PMAP_SIZEOF(ALPHA_MAXPROCS) < ALPHA_PGBYTES);
    274 CTASSERT(sizeof(struct pmap_percpu) == COHERENCY_UNIT);
    275 
    276 /*
    277  * Address Space Numbers.
    278  *
    279  * On many implementations of the Alpha architecture, the TLB entries and
    280  * I-cache blocks are tagged with a unique number within an implementation-
    281  * specified range.  When a process context becomes active, the ASN is used
    282  * to match TLB entries; if a TLB entry for a particular VA does not match
    283  * the current ASN, it is ignored (one could think of the processor as
    284  * having a collection of <max ASN> separate TLBs).  This allows operating
    285  * system software to skip the TLB flush that would otherwise be necessary
    286  * at context switch time.
    287  *
    288  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
    289  * causes TLB entries to match any ASN.  The PALcode also provides
    290  * a TBI (Translation Buffer Invalidate) operation that flushes all
    291  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
    292  * mappings, so that invalidation of all user mappings does not invalidate
    293  * kernel mappings (which are consistent across all processes).
    294  *
    295  * pmap_next_asn always indicates to the next ASN to use.  When
    296  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
    297  *
    298  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
    299  * TLB entries and the I-cache are flushed, the generation number is bumped,
    300  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
    301  *
    302  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
    303  * prevents the following scenario to ensure no accidental accesses to
    304  * user space for LWPs using the kernel pmap.  This is important because
    305  * the PALcode may use the recursive VPT to service TLB misses.
    306  *
    307  * By reserving an ASN for the kernel, we are guaranteeing that an lwp
    308  * will not see any valid user space TLB entries until it passes through
    309  * pmap_activate() for the first time.
    310  *
    311  * On processors that do not support ASNs, the PALcode invalidates
    312  * non-ASM TLB entries automatically on swpctx.  We completely skip
    313  * the ASN machinery in this case because the PALcode neither reads
    314  * nor writes that field of the HWPCB.
    315  */
    316 
    317 /* max ASN supported by the system */
    318 static u_int	pmap_max_asn __read_mostly;
    319 
    320 /*
    321  * Locking:
    322  *
    323  *	READ/WRITE LOCKS
    324  *	----------------
    325  *
    326  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
    327  *	  provide mutex access to the pmap module.  Most operations lock
    328  *	  the pmap first, then PV lists as needed.  However, some operations,
    329  *	  such as pmap_page_protect(), lock the PV lists before locking
    330  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
    331  *	  pmap module if locking in the PV->pmap direction.  This is
    332  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
    333  *	  if locking pmap->PV and a (exclusive) write lock if locking in
    334  *	  the PV->pmap direction.  Since only one thread can hold a write
    335  *	  lock at a time, this provides the mutex.
    336  *
    337  *	MUTEXES
    338  *	-------
    339  *
    340  *	* pmap lock (global hash) - These locks protect the pmap structures.
    341  *
    342  *	* pmap activation lock (global hash) - These IPL_SCHED spin locks
    343  *	  synchronize pmap_activate() and TLB shootdowns.  This has a lock
    344  *	  ordering constraint with the tlb_lock:
    345  *
    346  *		tlb_lock -> pmap activation lock
    347  *
    348  *	* pvh_lock (global hash) - These locks protect the PV lists for
    349  *	  managed pages.
    350  *
    351  *	* tlb_lock - This IPL_VM lock serializes local and remote TLB
    352  *	  invalidation.
    353  *
    354  *	* pmap_all_pmaps_lock - This lock protects the global list of
    355  *	  all pmaps.
    356  *
    357  *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
    358  *	  and the virtual_end variable.
    359  *
    360  *	  There is a lock ordering constraint for pmap_growkernel_lock.
    361  *	  pmap_growkernel() acquires the locks in the following order:
    362  *
    363  *		pmap_growkernel_lock (write) -> pmap_all_pmaps_lock ->
    364  *		    pmap lock
    365  *
    366  *	  We need to ensure consistency between user pmaps and the
    367  *	  kernel_lev1map.  For this reason, pmap_growkernel_lock must
    368  *	  be held to prevent kernel_lev1map changing across pmaps
    369  *	  being added to / removed from the global pmaps list.
    370  *
    371  *	Address space number management (global ASN counters and per-pmap
    372  *	ASN state) are not locked; they use arrays of values indexed
    373  *	per-processor.
    374  *
    375  *	All internal functions which operate on a pmap are called
    376  *	with the pmap already locked by the caller (which will be
    377  *	an interface function).
    378  */
    379 static krwlock_t pmap_main_lock __cacheline_aligned;
    380 static kmutex_t pmap_all_pmaps_lock __cacheline_aligned;
    381 static krwlock_t pmap_growkernel_lock __cacheline_aligned;
    382 
    383 #define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
    384 #define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
    385 #define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
    386 #define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
    387 
    388 static union {
    389 	kmutex_t	lock;
    390 	uint8_t		pad[COHERENCY_UNIT];
    391 } pmap_pvh_locks[64] __cacheline_aligned;
    392 
    393 #define	PVH_LOCK_HASH(pg)						\
    394 	((((uintptr_t)(pg)) >> 6) & 63)
    395 
    396 static inline kmutex_t *
    397 pmap_pvh_lock(struct vm_page *pg)
    398 {
    399 	return &pmap_pvh_locks[PVH_LOCK_HASH(pg)].lock;
    400 }
    401 
    402 static union {
    403 	struct {
    404 		kmutex_t	lock;
    405 		kmutex_t	activation_lock;
    406 	} locks;
    407 	uint8_t		pad[COHERENCY_UNIT];
    408 } pmap_pmap_locks[64] __cacheline_aligned;
    409 
    410 #define	PMAP_LOCK_HASH(pm)						\
    411 	((((uintptr_t)(pm)) >> 6) & 63)
    412 
    413 static inline kmutex_t *
    414 pmap_pmap_lock(pmap_t const pmap)
    415 {
    416 	return &pmap_pmap_locks[PMAP_LOCK_HASH(pmap)].locks.lock;
    417 }
    418 
    419 static inline kmutex_t *
    420 pmap_activation_lock(pmap_t const pmap)
    421 {
    422 	return &pmap_pmap_locks[PMAP_LOCK_HASH(pmap)].locks.activation_lock;
    423 }
    424 
    425 #define	PMAP_LOCK(pmap)		mutex_enter(pmap_pmap_lock(pmap))
    426 #define	PMAP_UNLOCK(pmap)	mutex_exit(pmap_pmap_lock(pmap))
    427 
    428 #define	PMAP_ACT_LOCK(pmap)	mutex_spin_enter(pmap_activation_lock(pmap))
    429 #define	PMAP_ACT_TRYLOCK(pmap)	mutex_tryenter(pmap_activation_lock(pmap))
    430 #define	PMAP_ACT_UNLOCK(pmap)	mutex_spin_exit(pmap_activation_lock(pmap))
    431 
    432 #if defined(MULTIPROCESSOR)
    433 #define	pmap_all_cpus()		cpus_running
    434 #else
    435 #define	pmap_all_cpus()		~0UL
    436 #endif /* MULTIPROCESSOR */
    437 
    438 /*
    439  * TLB context structure; see description in "TLB management" section
    440  * below.
    441  */
    442 #define	TLB_CTX_MAXVA		8
    443 #define	TLB_CTX_ALLVA		PAGE_MASK
    444 struct pmap_tlb_context {
    445 	uintptr_t		t_addrdata[TLB_CTX_MAXVA];
    446 	pmap_t			t_pmap;
    447 	struct pmap_pagelist	t_freeptq;
    448 	struct pmap_pvlist	t_freepvq;
    449 };
    450 
    451 /*
    452  * Internal routines
    453  */
    454 static void	alpha_protection_init(void);
    455 static pt_entry_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool,
    456 				      pv_entry_t *,
    457 				      struct pmap_tlb_context *);
    458 static void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t,
    459 			       struct pmap_tlb_context *);
    460 
    461 /*
    462  * PT page management functions.
    463  */
    464 static int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
    465 static void	pmap_ptpage_free(pmap_t, pt_entry_t *,
    466 				 struct pmap_tlb_context *);
    467 static void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *,
    468 		     struct pmap_tlb_context *);
    469 static void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *,
    470 		     struct pmap_tlb_context *);
    471 static void	pmap_l1pt_delref(pmap_t, pt_entry_t *);
    472 
    473 static void	*pmap_l1pt_alloc(struct pool *, int);
    474 static void	pmap_l1pt_free(struct pool *, void *);
    475 
    476 static struct pool_allocator pmap_l1pt_allocator = {
    477 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
    478 };
    479 
    480 static int	pmap_l1pt_ctor(void *, void *, int);
    481 
    482 /*
    483  * PV table management functions.
    484  */
    485 static int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
    486 			      bool, pv_entry_t);
    487 static void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool,
    488 			       pv_entry_t *, struct pmap_tlb_context *);
    489 static void	*pmap_pv_page_alloc(struct pool *, int);
    490 static void	pmap_pv_page_free(struct pool *, void *);
    491 
    492 static struct pool_allocator pmap_pv_page_allocator = {
    493 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
    494 };
    495 
    496 #ifdef DEBUG
    497 void	pmap_pv_dump(paddr_t);
    498 #endif
    499 
    500 #define	pmap_pv_alloc()		pool_cache_get(&pmap_pv_cache, PR_NOWAIT)
    501 #define	pmap_pv_free(pv)	pool_cache_put(&pmap_pv_cache, (pv))
    502 
    503 /*
    504  * Generic routine for freeing pages on a pmap_pagelist back to
    505  * the system.
    506  */
    507 static void
    508 pmap_pagelist_free(struct pmap_pagelist * const list)
    509 {
    510 	struct vm_page *pg;
    511 
    512 	while ((pg = LIST_FIRST(list)) != NULL) {
    513 		LIST_REMOVE(pg, pageq.list);
    514 		/* Fix up ref count; it's not always 0 when we get here. */
    515 		PHYSPAGE_REFCNT_SET(pg, 0);
    516 		uvm_pagefree(pg);
    517 	}
    518 }
    519 
    520 /*
    521  * Generic routine for freeing a list of PV entries back to the
    522  * system.
    523  */
    524 static void
    525 pmap_pvlist_free(struct pmap_pvlist * const list)
    526 {
    527 	pv_entry_t pv;
    528 
    529 	while ((pv = LIST_FIRST(list)) != NULL) {
    530 		LIST_REMOVE(pv, pv_link);
    531 		pmap_pv_free(pv);
    532 	}
    533 }
    534 
    535 /*
    536  * TLB management.
    537  *
    538  * TLB invalidations need to be performed on local and remote CPUs
    539  * whenever parts of the PTE that the hardware or PALcode understands
    540  * changes.  In order amortize the cost of these operations, we will
    541  * queue up to 8 addresses to invalidate in a batch.  Any more than
    542  * that, and we will hit the entire TLB.
    543  *
    544  * Some things that add complexity:
    545  *
    546  * ==> ASNs. A CPU may have valid TLB entries for other than the current
    547  *     address space.  We can only invalidate TLB entries for the current
    548  *     address space, so when asked to invalidate a VA for the non-current
    549  *     pmap on a given CPU, we simply invalidate the ASN for that pmap,CPU
    550  *     tuple so that new one is allocated on the next activation on that
    551  *     CPU.  N.B. that for CPUs that don't implement ASNs, SWPCTX does all
    552  *     the work necessary, so we can skip some work in the pmap module
    553  *     itself.
    554  *
    555  *     When a pmap is activated on a given CPU, we set a corresponding
    556  *     bit in pmap::pm_cpus, indicating that it potentially has valid
    557  *     TLB entries for that address space.  This bitmap is then used to
    558  *     determine which remote CPUs need to be notified of invalidations.
    559  *     The bit is cleared when the ASN is invalidated on that CPU.
    560  *
    561  *     In order to serialize with activating an address space on a
    562  *     given CPU (that we can reliably send notifications only to
    563  *     relevant remote CPUs), we acquire the pmap lock in pmap_activate()
    564  *     and also hold the lock while remote shootdowns take place.
    565  *     This does not apply to the kernel pmap; all CPUs are notified about
    566  *     invalidations for the kernel pmap, and the pmap lock is not held
    567  *     in pmap_activate() for the kernel pmap.
    568  *
    569  * ==> P->V operations (e.g. pmap_page_protect()) may require sending
    570  *     invalidations for multiple address spaces.  We only track one
    571  *     address space at a time, and if we encounter more than one, then
    572  *     the notification each CPU gets is to hit the entire TLB.  Note
    573  *     also that we can't serialize with pmap_activate() in this case,
    574  *     so all CPUs will get the notification, and they check when
    575  *     processing the notification if the pmap is current on that CPU.
    576  *
    577  * Invalidation information is gathered into a pmap_tlb_context structure
    578  * that includes room for 8 VAs, the pmap the VAs belong to, a bitmap of
    579  * CPUs to be notified, and a list for PT pages that are freed during
    580  * removal off mappings.  The number of valid addresses in the list as
    581  * well as flags are squeezed into the lower bits of the first two VAs.
    582  * Storage for this structure is allocated on the stack.  We need to be
    583  * careful to keep the size of this structure under control.
    584  *
    585  * When notifying remote CPUs, we acquire the tlb_lock (which also
    586  * blocks IPIs), record the pointer to our context structure, set a
    587  * global bitmap off CPUs to be notified, and then send the IPIs to
    588  * each victim.  While the other CPUs are in-flight, we then perform
    589  * any invalidations necessary on the local CPU.  Once that is done,
    590  * we then wait the global context pointer to be cleared, which
    591  * will be done by the final remote CPU to complete their work. This
    592  * method reduces cache line contention during processing.
    593  *
    594  * When removing mappings in user pmaps, this implementation frees page
    595  * table pages back to the VM system once they contain no valid mappings.
    596  * As we do this, we must ensure to invalidate TLB entries that the
    597  * CPU might hold for the respective recursive VPT mappings.  This must
    598  * be done whenever an L1 or L2 PTE is invalidated.  Until these VPT
    599  * translations are invalidated, the PT pages must not be reused.  For
    600  * this reason, we keep a list of freed PT pages in the context structure
    601  * and drain them off once all invalidations are complete.
    602  *
    603  * NOTE: The value of TLB_CTX_MAXVA is tuned to accommodate the UBC
    604  * window size (defined as 64KB on alpha in <machine/vmparam.h>).
    605  */
    606 
    607 #define	TLB_CTX_F_ASM		__BIT(0)
    608 #define	TLB_CTX_F_IMB		__BIT(1)
    609 #define	TLB_CTX_F_KIMB		__BIT(2)
    610 #define	TLB_CTX_F_PV		__BIT(3)
    611 #define	TLB_CTX_F_MULTI		__BIT(4)
    612 
    613 #define	TLB_CTX_COUNT(ctx)	((ctx)->t_addrdata[0] & PAGE_MASK)
    614 #define	TLB_CTX_INC_COUNT(ctx)	 (ctx)->t_addrdata[0]++
    615 #define	TLB_CTX_SET_ALLVA(ctx)	 (ctx)->t_addrdata[0] |= TLB_CTX_ALLVA
    616 
    617 #define	TLB_CTX_FLAGS(ctx)	((ctx)->t_addrdata[1] & PAGE_MASK)
    618 #define	TLB_CTX_SET_FLAG(ctx, f) (ctx)->t_addrdata[1] |= (f)
    619 
    620 #define	TLB_CTX_VA(ctx, i)	((ctx)->t_addrdata[(i)] & ~PAGE_MASK)
    621 #define	TLB_CTX_SETVA(ctx, i, va)					\
    622 	(ctx)->t_addrdata[(i)] = (va) | ((ctx)->t_addrdata[(i)] & PAGE_MASK)
    623 
    624 static struct {
    625 	kmutex_t	lock;
    626 	struct evcnt	events;
    627 } tlb_shootdown __cacheline_aligned;
    628 #define	tlb_lock	tlb_shootdown.lock
    629 #define	tlb_evcnt	tlb_shootdown.events
    630 #if defined(MULTIPROCESSOR)
    631 static const struct pmap_tlb_context *tlb_context __cacheline_aligned;
    632 static unsigned long tlb_pending __cacheline_aligned;
    633 #endif /* MULTIPROCESSOR */
    634 
    635 #if defined(TLB_STATS)
    636 #define	TLB_COUNT_DECL(cnt)	static struct evcnt tlb_stat_##cnt
    637 #define	TLB_COUNT(cnt)		atomic_inc_64(&tlb_stat_##cnt .ev_count)
    638 #define	TLB_COUNT_ATTACH(cnt)						\
    639 	evcnt_attach_dynamic_nozero(&tlb_stat_##cnt, EVCNT_TYPE_MISC,	\
    640 	    NULL, "TLB", #cnt)
    641 
    642 TLB_COUNT_DECL(invalidate_multi_tbia);
    643 TLB_COUNT_DECL(invalidate_multi_tbiap);
    644 TLB_COUNT_DECL(invalidate_multi_imb);
    645 
    646 TLB_COUNT_DECL(invalidate_kern_tbia);
    647 TLB_COUNT_DECL(invalidate_kern_tbis);
    648 TLB_COUNT_DECL(invalidate_kern_imb);
    649 
    650 TLB_COUNT_DECL(invalidate_user_not_current);
    651 TLB_COUNT_DECL(invalidate_user_lazy_imb);
    652 TLB_COUNT_DECL(invalidate_user_tbiap);
    653 TLB_COUNT_DECL(invalidate_user_tbis);
    654 
    655 TLB_COUNT_DECL(shootdown_kernel);
    656 TLB_COUNT_DECL(shootdown_user);
    657 TLB_COUNT_DECL(shootdown_imb);
    658 TLB_COUNT_DECL(shootdown_kimb);
    659 TLB_COUNT_DECL(shootdown_overflow);
    660 
    661 TLB_COUNT_DECL(shootdown_all_user);
    662 TLB_COUNT_DECL(shootdown_all_user_imb);
    663 
    664 TLB_COUNT_DECL(shootdown_pv);
    665 TLB_COUNT_DECL(shootdown_pv_multi);
    666 
    667 TLB_COUNT_DECL(shootnow_over_notify);
    668 TLB_COUNT_DECL(shootnow_remote);
    669 
    670 TLB_COUNT_DECL(reason_remove_kernel);
    671 TLB_COUNT_DECL(reason_remove_user);
    672 TLB_COUNT_DECL(reason_remove_all_user);
    673 TLB_COUNT_DECL(reason_page_protect_read);
    674 TLB_COUNT_DECL(reason_page_protect_none);
    675 TLB_COUNT_DECL(reason_protect);
    676 TLB_COUNT_DECL(reason_enter_kernel);
    677 TLB_COUNT_DECL(reason_enter_user);
    678 TLB_COUNT_DECL(reason_kenter);
    679 TLB_COUNT_DECL(reason_enter_l2pt_delref);
    680 TLB_COUNT_DECL(reason_enter_l3pt_delref);
    681 TLB_COUNT_DECL(reason_kremove);
    682 TLB_COUNT_DECL(reason_clear_modify);
    683 TLB_COUNT_DECL(reason_clear_reference);
    684 TLB_COUNT_DECL(reason_emulate_reference);
    685 
    686 TLB_COUNT_DECL(asn_reuse);
    687 TLB_COUNT_DECL(asn_newgen);
    688 TLB_COUNT_DECL(asn_assign);
    689 
    690 TLB_COUNT_DECL(activate_both_change);
    691 TLB_COUNT_DECL(activate_asn_change);
    692 TLB_COUNT_DECL(activate_ptbr_change);
    693 TLB_COUNT_DECL(activate_swpctx);
    694 TLB_COUNT_DECL(activate_skip_swpctx);
    695 
    696 #else /* ! TLB_STATS */
    697 #define	TLB_COUNT(cnt)		__nothing
    698 #define	TLB_COUNT_ATTACH(cnt)	__nothing
    699 #endif /* TLB_STATS */
    700 
    701 static void
    702 pmap_tlb_init(void)
    703 {
    704 	/* mutex is initialized in pmap_bootstrap(). */
    705 
    706 	evcnt_attach_dynamic_nozero(&tlb_evcnt, EVCNT_TYPE_MISC,
    707 	    NULL, "TLB", "shootdown");
    708 
    709 	TLB_COUNT_ATTACH(invalidate_multi_tbia);
    710 	TLB_COUNT_ATTACH(invalidate_multi_tbiap);
    711 	TLB_COUNT_ATTACH(invalidate_multi_imb);
    712 
    713 	TLB_COUNT_ATTACH(invalidate_kern_tbia);
    714 	TLB_COUNT_ATTACH(invalidate_kern_tbis);
    715 	TLB_COUNT_ATTACH(invalidate_kern_imb);
    716 
    717 	TLB_COUNT_ATTACH(invalidate_user_not_current);
    718 	TLB_COUNT_ATTACH(invalidate_user_lazy_imb);
    719 	TLB_COUNT_ATTACH(invalidate_user_tbiap);
    720 	TLB_COUNT_ATTACH(invalidate_user_tbis);
    721 
    722 	TLB_COUNT_ATTACH(shootdown_kernel);
    723 	TLB_COUNT_ATTACH(shootdown_user);
    724 	TLB_COUNT_ATTACH(shootdown_imb);
    725 	TLB_COUNT_ATTACH(shootdown_kimb);
    726 	TLB_COUNT_ATTACH(shootdown_overflow);
    727 
    728 	TLB_COUNT_ATTACH(shootdown_all_user);
    729 	TLB_COUNT_ATTACH(shootdown_all_user_imb);
    730 
    731 	TLB_COUNT_ATTACH(shootdown_pv);
    732 	TLB_COUNT_ATTACH(shootdown_pv_multi);
    733 
    734 	TLB_COUNT_ATTACH(shootnow_over_notify);
    735 	TLB_COUNT_ATTACH(shootnow_remote);
    736 
    737 	TLB_COUNT_ATTACH(reason_remove_kernel);
    738 	TLB_COUNT_ATTACH(reason_remove_user);
    739 	TLB_COUNT_ATTACH(reason_remove_all_user);
    740 	TLB_COUNT_ATTACH(reason_page_protect_read);
    741 	TLB_COUNT_ATTACH(reason_page_protect_none);
    742 	TLB_COUNT_ATTACH(reason_protect);
    743 	TLB_COUNT_ATTACH(reason_enter_kernel);
    744 	TLB_COUNT_ATTACH(reason_enter_user);
    745 	TLB_COUNT_ATTACH(reason_kenter);
    746 	TLB_COUNT_ATTACH(reason_enter_l2pt_delref);
    747 	TLB_COUNT_ATTACH(reason_enter_l3pt_delref);
    748 	TLB_COUNT_ATTACH(reason_kremove);
    749 	TLB_COUNT_ATTACH(reason_clear_modify);
    750 	TLB_COUNT_ATTACH(reason_clear_reference);
    751 
    752 	TLB_COUNT_ATTACH(asn_reuse);
    753 	TLB_COUNT_ATTACH(asn_newgen);
    754 	TLB_COUNT_ATTACH(asn_assign);
    755 
    756 	TLB_COUNT_ATTACH(activate_both_change);
    757 	TLB_COUNT_ATTACH(activate_asn_change);
    758 	TLB_COUNT_ATTACH(activate_ptbr_change);
    759 	TLB_COUNT_ATTACH(activate_swpctx);
    760 	TLB_COUNT_ATTACH(activate_skip_swpctx);
    761 }
    762 
    763 static inline void
    764 pmap_tlb_context_init(struct pmap_tlb_context * const tlbctx, uintptr_t flags)
    765 {
    766 	/* Initialize the minimum number of fields. */
    767 	tlbctx->t_addrdata[0] = 0;
    768 	tlbctx->t_addrdata[1] = flags;
    769 	tlbctx->t_pmap = NULL;
    770 	LIST_INIT(&tlbctx->t_freeptq);
    771 	LIST_INIT(&tlbctx->t_freepvq);
    772 }
    773 
    774 static void
    775 pmap_tlb_shootdown_internal(pmap_t const pmap, vaddr_t const va,
    776     pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx)
    777 {
    778 	KASSERT(pmap != NULL);
    779 	KASSERT((va & PAGE_MASK) == 0);
    780 
    781 	/*
    782 	 * Figure out who needs to hear about this, and the scope
    783 	 * of an all-entries invalidate.
    784 	 */
    785 	if (pmap == pmap_kernel()) {
    786 		TLB_COUNT(shootdown_kernel);
    787 		KASSERT(pte_bits & PG_ASM);
    788 		TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_ASM);
    789 
    790 		/* Note if an I-stream sync is also needed. */
    791 		if (pte_bits & PG_EXEC) {
    792 			TLB_COUNT(shootdown_kimb);
    793 			TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_KIMB);
    794 		}
    795 	} else {
    796 		TLB_COUNT(shootdown_user);
    797 		KASSERT((pte_bits & PG_ASM) == 0);
    798 
    799 		/* Note if an I-stream sync is also needed. */
    800 		if (pte_bits & PG_EXEC) {
    801 			TLB_COUNT(shootdown_imb);
    802 			TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_IMB);
    803 		}
    804 	}
    805 
    806 	KASSERT(tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap);
    807 	tlbctx->t_pmap = pmap;
    808 
    809 	/*
    810 	 * If we're already at the max, just tell each active CPU
    811 	 * to nail everything.
    812 	 */
    813 	const uintptr_t count = TLB_CTX_COUNT(tlbctx);
    814 	if (count > TLB_CTX_MAXVA) {
    815 		return;
    816 	}
    817 	if (count == TLB_CTX_MAXVA) {
    818 		TLB_COUNT(shootdown_overflow);
    819 		TLB_CTX_SET_ALLVA(tlbctx);
    820 		return;
    821 	}
    822 
    823 	TLB_CTX_SETVA(tlbctx, count, va);
    824 	TLB_CTX_INC_COUNT(tlbctx);
    825 }
    826 
    827 static void
    828 pmap_tlb_shootdown(pmap_t const pmap, vaddr_t const va,
    829     pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx)
    830 {
    831 	KASSERT((TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV) == 0);
    832 	pmap_tlb_shootdown_internal(pmap, va, pte_bits, tlbctx);
    833 }
    834 
    835 static void
    836 pmap_tlb_shootdown_all_user(pmap_t const pmap, pt_entry_t const pte_bits,
    837     struct pmap_tlb_context * const tlbctx)
    838 {
    839 	KASSERT(pmap != pmap_kernel());
    840 
    841 	TLB_COUNT(shootdown_all_user);
    842 
    843 	/* Note if an I-stream sync is also needed. */
    844 	if (pte_bits & PG_EXEC) {
    845 		TLB_COUNT(shootdown_all_user_imb);
    846 		TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_IMB);
    847 	}
    848 
    849 	if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV) {
    850 		if (tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap) {
    851 			if (tlbctx->t_pmap == NULL) {
    852 				pmap_reference(pmap);
    853 				tlbctx->t_pmap = pmap;
    854 			}
    855 		} else {
    856 			TLB_CTX_SET_FLAG(tlbctx, TLB_CTX_F_MULTI);
    857 		}
    858 	} else {
    859 		KASSERT(tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap);
    860 		tlbctx->t_pmap = pmap;
    861 	}
    862 
    863 	TLB_CTX_SET_ALLVA(tlbctx);
    864 }
    865 
    866 static void
    867 pmap_tlb_shootdown_pv(pmap_t const pmap, vaddr_t const va,
    868     pt_entry_t const pte_bits, struct pmap_tlb_context * const tlbctx)
    869 {
    870 
    871 	KASSERT(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV);
    872 
    873 	TLB_COUNT(shootdown_pv);
    874 
    875 	if (tlbctx->t_pmap == NULL || tlbctx->t_pmap == pmap) {
    876 		if (tlbctx->t_pmap == NULL) {
    877 			pmap_reference(pmap);
    878 			tlbctx->t_pmap = pmap;
    879 		}
    880 		pmap_tlb_shootdown_internal(pmap, va, pte_bits, tlbctx);
    881 	} else {
    882 		TLB_COUNT(shootdown_pv_multi);
    883 		uintptr_t flags = TLB_CTX_F_MULTI;
    884 		if (pmap == pmap_kernel()) {
    885 			KASSERT(pte_bits & PG_ASM);
    886 			flags |= TLB_CTX_F_ASM;
    887 		} else {
    888 			KASSERT((pte_bits & PG_ASM) == 0);
    889 		}
    890 
    891 		/*
    892 		 * No need to distinguish between kernel and user IMB
    893 		 * here; see pmap_tlb_invalidate_multi().
    894 		 */
    895 		if (pte_bits & PG_EXEC) {
    896 			flags |= TLB_CTX_F_IMB;
    897 		}
    898 		TLB_CTX_SET_ALLVA(tlbctx);
    899 		TLB_CTX_SET_FLAG(tlbctx, flags);
    900 	}
    901 }
    902 
    903 static void
    904 pmap_tlb_invalidate_multi(const struct pmap_tlb_context * const tlbctx)
    905 {
    906 	if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) {
    907 		TLB_COUNT(invalidate_multi_tbia);
    908 		ALPHA_TBIA();
    909 	} else {
    910 		TLB_COUNT(invalidate_multi_tbiap);
    911 		ALPHA_TBIAP();
    912 	}
    913 	if (TLB_CTX_FLAGS(tlbctx) & (TLB_CTX_F_IMB | TLB_CTX_F_KIMB)) {
    914 		TLB_COUNT(invalidate_multi_imb);
    915 		alpha_pal_imb();
    916 	}
    917 }
    918 
    919 static void
    920 pmap_tlb_invalidate_kernel(const struct pmap_tlb_context * const tlbctx)
    921 {
    922 	const uintptr_t count = TLB_CTX_COUNT(tlbctx);
    923 
    924 	if (count == TLB_CTX_ALLVA) {
    925 		TLB_COUNT(invalidate_kern_tbia);
    926 		ALPHA_TBIA();
    927 	} else {
    928 		TLB_COUNT(invalidate_kern_tbis);
    929 		for (uintptr_t i = 0; i < count; i++) {
    930 			ALPHA_TBIS(TLB_CTX_VA(tlbctx, i));
    931 		}
    932 	}
    933 	if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_KIMB) {
    934 		TLB_COUNT(invalidate_kern_imb);
    935 		alpha_pal_imb();
    936 	}
    937 }
    938 
    939 static void
    940 pmap_tlb_invalidate(const struct pmap_tlb_context * const tlbctx,
    941     const struct cpu_info * const ci)
    942 {
    943 	const uintptr_t count = TLB_CTX_COUNT(tlbctx);
    944 
    945 	if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_MULTI) {
    946 		pmap_tlb_invalidate_multi(tlbctx);
    947 		return;
    948 	}
    949 
    950 	if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) {
    951 		pmap_tlb_invalidate_kernel(tlbctx);
    952 		return;
    953 	}
    954 
    955 	KASSERT(kpreempt_disabled());
    956 
    957 	pmap_t const pmap = tlbctx->t_pmap;
    958 	KASSERT(pmap != NULL);
    959 
    960 	if (__predict_false(pmap != ci->ci_pmap)) {
    961 		TLB_COUNT(invalidate_user_not_current);
    962 
    963 		/*
    964 		 * For CPUs that don't implement ASNs, the SWPCTX call
    965 		 * does all of the TLB invalidation work for us.
    966 		 */
    967 		if (__predict_false(pmap_max_asn == 0)) {
    968 			return;
    969 		}
    970 
    971 		const u_long cpu_mask = 1UL << ci->ci_cpuid;
    972 
    973 		/*
    974 		 * We cannot directly invalidate the TLB in this case,
    975 		 * so force allocation of a new ASN when the pmap becomes
    976 		 * active again.
    977 		 */
    978 		pmap->pm_percpu[ci->ci_cpuid].pmc_asngen = PMAP_ASNGEN_INVALID;
    979 		atomic_and_ulong(&pmap->pm_cpus, ~cpu_mask);
    980 
    981 		/*
    982 		 * This isn't strictly necessary; when we allocate a
    983 		 * new ASN, we're going to clear this bit and skip
    984 		 * syncing the I-stream.  But we will keep this bit
    985 		 * of accounting for internal consistency.
    986 		 */
    987 		if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_IMB) {
    988 			pmap->pm_percpu[ci->ci_cpuid].pmc_needisync = 1;
    989 		}
    990 		return;
    991 	}
    992 
    993 	if (TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_IMB) {
    994 		TLB_COUNT(invalidate_user_lazy_imb);
    995 		pmap->pm_percpu[ci->ci_cpuid].pmc_needisync = 1;
    996 	}
    997 
    998 	if (count == TLB_CTX_ALLVA) {
    999 		/*
   1000 		 * Another option here for CPUs that implement ASNs is
   1001 		 * to allocate a new ASN and do a SWPCTX.  That's almost
   1002 		 * certainly faster than a TBIAP, but would require us
   1003 		 * to synchronize against IPIs in pmap_activate().
   1004 		 */
   1005 		TLB_COUNT(invalidate_user_tbiap);
   1006 		KASSERT((TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_ASM) == 0);
   1007 		ALPHA_TBIAP();
   1008 	} else {
   1009 		TLB_COUNT(invalidate_user_tbis);
   1010 		for (uintptr_t i = 0; i < count; i++) {
   1011 			ALPHA_TBIS(TLB_CTX_VA(tlbctx, i));
   1012 		}
   1013 	}
   1014 }
   1015 
   1016 static void
   1017 pmap_tlb_shootnow(const struct pmap_tlb_context * const tlbctx)
   1018 {
   1019 
   1020 	if (TLB_CTX_COUNT(tlbctx) == 0) {
   1021 		/* No work to do. */
   1022 		return;
   1023 	}
   1024 
   1025 	/*
   1026 	 * Acquire the shootdown mutex.  This will also block IPL_VM
   1027 	 * interrupts and disable preemption.  It is critically important
   1028 	 * that IPIs not be blocked in this routine.
   1029 	 */
   1030 	KASSERT(alpha_pal_rdps() < ALPHA_PSL_IPL_CLOCK);
   1031 	mutex_spin_enter(&tlb_lock);
   1032 	tlb_evcnt.ev_count++;
   1033 
   1034 	const struct cpu_info *ci = curcpu();
   1035 	const u_long this_cpu = 1UL << ci->ci_cpuid;
   1036 	u_long active_cpus;
   1037 	bool activation_locked, activation_lock_tried;
   1038 
   1039 	/*
   1040 	 * Figure out who to notify.  If it's for the kernel or
   1041 	 * multiple address spaces, we notify everybody.  If
   1042 	 * it's a single user pmap, then we try to acquire the
   1043 	 * activation lock so we can get an accurate accounting
   1044 	 * of who needs to be notified.  If we can't acquire
   1045 	 * the activation lock, then just notify everyone and
   1046 	 * let them sort it out when they process the IPI.
   1047 	 */
   1048 	if (TLB_CTX_FLAGS(tlbctx) & (TLB_CTX_F_ASM | TLB_CTX_F_MULTI)) {
   1049 		active_cpus = pmap_all_cpus();
   1050 		activation_locked = false;
   1051 		activation_lock_tried = false;
   1052 	} else {
   1053 		KASSERT(tlbctx->t_pmap != NULL);
   1054 		activation_locked = PMAP_ACT_TRYLOCK(tlbctx->t_pmap);
   1055 		if (__predict_true(activation_locked)) {
   1056 			active_cpus = tlbctx->t_pmap->pm_cpus;
   1057 		} else {
   1058 			TLB_COUNT(shootnow_over_notify);
   1059 			active_cpus = pmap_all_cpus();
   1060 		}
   1061 		activation_lock_tried = true;
   1062 	}
   1063 
   1064 #if defined(MULTIPROCESSOR)
   1065 	/*
   1066 	 * If there are remote CPUs that need to do work, get them
   1067 	 * started now.
   1068 	 */
   1069 	const u_long remote_cpus = active_cpus & ~this_cpu;
   1070 	KASSERT(tlb_context == NULL);
   1071 	if (remote_cpus) {
   1072 		TLB_COUNT(shootnow_remote);
   1073 		tlb_context = tlbctx;
   1074 		tlb_pending = remote_cpus;
   1075 		alpha_multicast_ipi(remote_cpus, ALPHA_IPI_SHOOTDOWN);
   1076 	}
   1077 #endif /* MULTIPROCESSOR */
   1078 
   1079 	/*
   1080 	 * Now that the remotes have been notified, release the
   1081 	 * activation lock.
   1082 	 */
   1083 	if (activation_lock_tried) {
   1084 		if (activation_locked) {
   1085 			KASSERT(tlbctx->t_pmap != NULL);
   1086 			PMAP_ACT_UNLOCK(tlbctx->t_pmap);
   1087 		}
   1088 		/*
   1089 		 * When we tried to acquire the activation lock, we
   1090 		 * raised IPL to IPL_SCHED (even if we ultimately
   1091 		 * failed to acquire the lock), which blocks out IPIs.
   1092 		 * Force our IPL back down to IPL_VM so that we can
   1093 		 * receive IPIs.
   1094 		 */
   1095 		alpha_pal_swpipl(IPL_VM);
   1096 	}
   1097 
   1098 	/*
   1099 	 * Do any work that we might need to do.  We don't need to
   1100 	 * synchronize with activation here because we know that
   1101 	 * for the current CPU, activation status will not change.
   1102 	 */
   1103 	if (active_cpus & this_cpu) {
   1104 		pmap_tlb_invalidate(tlbctx, ci);
   1105 	}
   1106 
   1107 #if defined(MULTIPROCESSOR)
   1108 	/* Wait for remote CPUs to finish. */
   1109 	if (remote_cpus) {
   1110 		int backoff = SPINLOCK_BACKOFF_MIN;
   1111 		u_int spins = 0;
   1112 
   1113 		while (atomic_load_acquire(&tlb_context) != NULL) {
   1114 			SPINLOCK_BACKOFF(backoff);
   1115 			if (spins++ > 0x0fffffff) {
   1116 				printf("TLB LOCAL MASK  = 0x%016lx\n",
   1117 				    this_cpu);
   1118 				printf("TLB REMOTE MASK = 0x%016lx\n",
   1119 				    remote_cpus);
   1120 				printf("TLB REMOTE PENDING = 0x%016lx\n",
   1121 				    tlb_pending);
   1122 				printf("TLB CONTEXT = %p\n", tlb_context);
   1123 				printf("TLB LOCAL IPL = %lu\n",
   1124 				    alpha_pal_rdps());
   1125 				panic("pmap_tlb_shootnow");
   1126 			}
   1127 		}
   1128 	}
   1129 	KASSERT(tlb_context == NULL);
   1130 #endif /* MULTIPROCESSOR */
   1131 
   1132 	mutex_spin_exit(&tlb_lock);
   1133 
   1134 	if (__predict_false(TLB_CTX_FLAGS(tlbctx) & TLB_CTX_F_PV)) {
   1135 		/*
   1136 		 * P->V TLB operations may operate on multiple pmaps.
   1137 		 * The shootdown takes a reference on the first pmap it
   1138 		 * encounters, in order to prevent it from disappearing,
   1139 		 * in the hope that we end up with a single-pmap P->V
   1140 		 * operation (instrumentation shows this is not rare).
   1141 		 *
   1142 		 * Once this shootdown is finished globally, we need to
   1143 		 * release this extra reference.
   1144 		 */
   1145 		KASSERT(tlbctx->t_pmap != NULL);
   1146 		pmap_destroy(tlbctx->t_pmap);
   1147 	}
   1148 }
   1149 
   1150 #if defined(MULTIPROCESSOR)
   1151 void
   1152 pmap_tlb_shootdown_ipi(struct cpu_info * const ci,
   1153 
   1154     struct trapframe * const tf __unused)
   1155 {
   1156 	KASSERT(tlb_context != NULL);
   1157 	pmap_tlb_invalidate(tlb_context, ci);
   1158 	if (atomic_and_ulong_nv(&tlb_pending, ~(1UL << ci->ci_cpuid)) == 0) {
   1159 		atomic_store_release(&tlb_context, NULL);
   1160 	}
   1161 }
   1162 #endif /* MULTIPROCESSOR */
   1163 
   1164 static inline void
   1165 pmap_tlb_context_drain(struct pmap_tlb_context * const tlbctx)
   1166 {
   1167 	if (! LIST_EMPTY(&tlbctx->t_freeptq)) {
   1168 		pmap_pagelist_free(&tlbctx->t_freeptq);
   1169 	}
   1170 	if (! LIST_EMPTY(&tlbctx->t_freepvq)) {
   1171 		pmap_pvlist_free(&tlbctx->t_freepvq);
   1172 	}
   1173 }
   1174 
   1175 /*
   1176  * ASN management functions.
   1177  */
   1178 static u_int	pmap_asn_alloc(pmap_t, struct cpu_info *);
   1179 
   1180 /*
   1181  * Misc. functions.
   1182  */
   1183 static struct vm_page *pmap_physpage_alloc(int);
   1184 static void	pmap_physpage_free(paddr_t);
   1185 static int	pmap_physpage_addref(void *);
   1186 static int	pmap_physpage_delref(void *);
   1187 
   1188 static bool	vtophys_internal(vaddr_t, paddr_t *p);
   1189 
   1190 /*
   1191  * PMAP_KERNEL_PTE:
   1192  *
   1193  *	Get a kernel PTE.
   1194  *
   1195  *	If debugging, do a table walk.  If not debugging, just use
   1196  *	the Virtual Page Table, since all kernel page tables are
   1197  *	pre-allocated and mapped in.
   1198  */
   1199 #ifdef DEBUG
   1200 #define	PMAP_KERNEL_PTE(va)						\
   1201 ({									\
   1202 	pt_entry_t *l1pte_, *l2pte_;					\
   1203 									\
   1204 	l1pte_ = pmap_l1pte(kernel_lev1map, va);			\
   1205 	if (pmap_pte_v(l1pte_) == 0) {					\
   1206 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
   1207 		    "(line %d) pte=%p *pte=0x%016lx\n", (va), __LINE__,	\
   1208 		    l1pte_, *l1pte_);					\
   1209 		panic("PMAP_KERNEL_PTE");				\
   1210 	}								\
   1211 	l2pte_ = pmap_l2pte(kernel_lev1map, va, l1pte_);		\
   1212 	if (pmap_pte_v(l2pte_) == 0) {					\
   1213 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
   1214 		    "(line %d) pte=%p *pte=0x%016lx\n", (va), __LINE__,	\
   1215 		    l2pte_, *l2pte_);					\
   1216 		panic("PMAP_KERNEL_PTE");				\
   1217 	}								\
   1218 	pmap_l3pte(kernel_lev1map, va, l2pte_);				\
   1219 })
   1220 #else
   1221 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
   1222 #endif
   1223 
   1224 /*
   1225  * PMAP_STAT_{INCR,DECR}:
   1226  *
   1227  *	Increment or decrement a pmap statistic.
   1228  */
   1229 #define	PMAP_STAT_INCR(s, v)	atomic_add_long((unsigned long *)(&(s)), (v))
   1230 #define	PMAP_STAT_DECR(s, v)	atomic_add_long((unsigned long *)(&(s)), -(v))
   1231 
   1232 /*
   1233  * pmap_init_cpu:
   1234  *
   1235  *	Initilize pmap data in the cpu_info.
   1236  */
   1237 void
   1238 pmap_init_cpu(struct cpu_info * const ci)
   1239 {
   1240 	pmap_t const pmap = pmap_kernel();
   1241 
   1242 	/* All CPUs start out using the kernel pmap. */
   1243 	atomic_or_ulong(&pmap->pm_cpus, 1UL << ci->ci_cpuid);
   1244 	pmap_reference(pmap);
   1245 	ci->ci_pmap = pmap;
   1246 
   1247 	/* Initialize ASN allocation logic. */
   1248 	ci->ci_next_asn = PMAP_ASN_FIRST_USER;
   1249 	ci->ci_asn_gen = PMAP_ASNGEN_INITIAL;
   1250 }
   1251 
   1252 /*
   1253  * pmap_bootstrap:
   1254  *
   1255  *	Bootstrap the system to run with virtual memory.
   1256  *
   1257  *	Note: no locking is necessary in this function.
   1258  */
   1259 void
   1260 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
   1261 {
   1262 	vsize_t lev2mapsize, lev3mapsize;
   1263 	pt_entry_t *lev2map, *lev3map;
   1264 	pt_entry_t pte;
   1265 	vsize_t bufsz;
   1266 	struct pcb *pcb;
   1267 	int i;
   1268 
   1269 #ifdef DEBUG
   1270 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
   1271 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
   1272 #endif
   1273 
   1274 	/*
   1275 	 * Compute the number of pages kmem_arena will have.
   1276 	 */
   1277 	kmeminit_nkmempages();
   1278 
   1279 	/*
   1280 	 * Figure out how many initial PTE's are necessary to map the
   1281 	 * kernel.  We also reserve space for kmem_alloc_pageable()
   1282 	 * for vm_fork().
   1283 	 */
   1284 
   1285 	/* Get size of buffer cache and set an upper limit */
   1286 	bufsz = buf_memcalc();
   1287 	buf_setvalimit(bufsz);
   1288 
   1289 	lev3mapsize =
   1290 		(VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
   1291 		 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE +
   1292 		(maxproc * UPAGES) + nkmempages;
   1293 
   1294 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
   1295 
   1296 	/*
   1297 	 * Initialize `FYI' variables.  Note we're relying on
   1298 	 * the fact that BSEARCH sorts the vm_physmem[] array
   1299 	 * for us.
   1300 	 */
   1301 	avail_start = ptoa(uvm_physseg_get_avail_start(uvm_physseg_get_first()));
   1302 	avail_end = ptoa(uvm_physseg_get_avail_end(uvm_physseg_get_last()));
   1303 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
   1304 
   1305 #if 0
   1306 	printf("avail_start = 0x%lx\n", avail_start);
   1307 	printf("avail_end = 0x%lx\n", avail_end);
   1308 	printf("virtual_end = 0x%lx\n", virtual_end);
   1309 #endif
   1310 
   1311 	/*
   1312 	 * Allocate a level 1 PTE table for the kernel.
   1313 	 * This is always one page long.
   1314 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
   1315 	 */
   1316 	kernel_lev1map = (pt_entry_t *)
   1317 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
   1318 
   1319 	/*
   1320 	 * Allocate a level 2 PTE table for the kernel.
   1321 	 * These must map all of the level3 PTEs.
   1322 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
   1323 	 */
   1324 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
   1325 	lev2map = (pt_entry_t *)
   1326 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
   1327 
   1328 	/*
   1329 	 * Allocate a level 3 PTE table for the kernel.
   1330 	 * Contains lev3mapsize PTEs.
   1331 	 */
   1332 	lev3map = (pt_entry_t *)
   1333 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
   1334 
   1335 	/*
   1336 	 * Set up level 1 page table
   1337 	 */
   1338 
   1339 	/* Map all of the level 2 pte pages */
   1340 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
   1341 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
   1342 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
   1343 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
   1344 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
   1345 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
   1346 	}
   1347 
   1348 	/* Map the virtual page table */
   1349 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
   1350 	    << PG_SHIFT;
   1351 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
   1352 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
   1353 	VPT = (pt_entry_t *)VPTBASE;
   1354 
   1355 	/*
   1356 	 * Set up level 2 page table.
   1357 	 */
   1358 	/* Map all of the level 3 pte pages */
   1359 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
   1360 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
   1361 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
   1362 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
   1363 		/*
   1364 		 * No need to use l2pte_index() here; it's equivalent
   1365 		 * to just indexing with our loop variable i, but will
   1366 		 * fall over if we end up with more than 1 L2 PT page.
   1367 		 *
   1368 		 * In other words:
   1369 		 *
   1370 		 *	l2pte_index(VM_MIN_KERNEL_ADDRESS +
   1371 		 *	            (i*PAGE_SIZE*NPTEPG))
   1372 		 *
   1373 		 * ...is the same as 'i' so long as i stays below 1024.
   1374 		 */
   1375 		lev2map[i] = pte;
   1376 	}
   1377 
   1378 	/* Initialize the pmap_growkernel_lock. */
   1379 	rw_init(&pmap_growkernel_lock);
   1380 
   1381 	/*
   1382 	 * Set up level three page table (lev3map)
   1383 	 */
   1384 	/* Nothing to do; it's already zero'd */
   1385 
   1386 	/*
   1387 	 * Initialize the pmap pools and list.
   1388 	 */
   1389 	pmap_ncpuids = ncpuids;
   1390 	pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids),
   1391 	    COHERENCY_UNIT, 0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL);
   1392 	pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt",
   1393 	    &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL);
   1394 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
   1395 	    PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL,
   1396 	    NULL, NULL);
   1397 
   1398 	TAILQ_INIT(&pmap_all_pmaps);
   1399 
   1400 	/* Initialize the ASN logic.  See also pmap_init_cpu(). */
   1401 	pmap_max_asn = maxasn;
   1402 
   1403 	/*
   1404 	 * Initialize the locks.
   1405 	 */
   1406 	rw_init(&pmap_main_lock);
   1407 	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
   1408 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
   1409 		mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE);
   1410 	}
   1411 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
   1412 		mutex_init(&pmap_pmap_locks[i].locks.lock,
   1413 		    MUTEX_DEFAULT, IPL_NONE);
   1414 		mutex_init(&pmap_pmap_locks[i].locks.activation_lock,
   1415 		    MUTEX_SPIN, IPL_SCHED);
   1416 	}
   1417 
   1418 	/*
   1419 	 * This must block any interrupt from which a TLB shootdown
   1420 	 * could be issued, but must NOT block IPIs.
   1421 	 */
   1422 	mutex_init(&tlb_lock, MUTEX_SPIN, IPL_VM);
   1423 
   1424 	/*
   1425 	 * Initialize kernel pmap.  Note that all kernel mappings
   1426 	 * have PG_ASM set, so the ASN doesn't really matter for
   1427 	 * the kernel pmap.  Also, since the kernel pmap always
   1428 	 * references kernel_lev1map, it always has an invalid ASN
   1429 	 * generation.
   1430 	 */
   1431 	memset(pmap_kernel(), 0, sizeof(struct pmap));
   1432 	LIST_INIT(&pmap_kernel()->pm_ptpages);
   1433 	LIST_INIT(&pmap_kernel()->pm_pvents);
   1434 	atomic_store_relaxed(&pmap_kernel()->pm_count, 1);
   1435 	/* Kernel pmap does not have per-CPU info. */
   1436 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
   1437 
   1438 	/*
   1439 	 * Set up lwp0's PCB such that the ptbr points to the right place
   1440 	 * and has the kernel pmap's (really unused) ASN.
   1441 	 */
   1442 	pcb = lwp_getpcb(&lwp0);
   1443 	pcb->pcb_hw.apcb_ptbr =
   1444 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
   1445 	pcb->pcb_hw.apcb_asn = PMAP_ASN_KERNEL;
   1446 
   1447 	struct cpu_info * const ci = curcpu();
   1448 	pmap_init_cpu(ci);
   1449 }
   1450 
   1451 /*
   1452  * pmap_virtual_space:		[ INTERFACE ]
   1453  *
   1454  *	Define the initial bounds of the kernel virtual address space.
   1455  */
   1456 void
   1457 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
   1458 {
   1459 
   1460 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
   1461 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
   1462 }
   1463 
   1464 /*
   1465  * pmap_steal_memory:		[ INTERFACE ]
   1466  *
   1467  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
   1468  *	This function allows for early dynamic memory allocation until the
   1469  *	virtual memory system has been bootstrapped.  After that point, either
   1470  *	kmem_alloc or malloc should be used.  This function works by stealing
   1471  *	pages from the (to be) managed page pool, then implicitly mapping the
   1472  *	pages (by using their k0seg addresses) and zeroing them.
   1473  *
   1474  *	It may be used once the physical memory segments have been pre-loaded
   1475  *	into the vm_physmem[] array.  Early memory allocation MUST use this
   1476  *	interface!  This cannot be used after vm_page_startup(), and will
   1477  *	generate a panic if tried.
   1478  *
   1479  *	Note that this memory will never be freed, and in essence it is wired
   1480  *	down.
   1481  *
   1482  *	We must adjust *vstartp and/or *vendp iff we use address space
   1483  *	from the kernel virtual address range defined by pmap_virtual_space().
   1484  *
   1485  *	Note: no locking is necessary in this function.
   1486  */
   1487 vaddr_t
   1488 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
   1489 {
   1490 	int npgs;
   1491 	vaddr_t va;
   1492 	paddr_t pa;
   1493 
   1494 	uvm_physseg_t bank;
   1495 
   1496 	size = round_page(size);
   1497 	npgs = atop(size);
   1498 
   1499 #if 0
   1500 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
   1501 #endif
   1502 
   1503 	for (bank = uvm_physseg_get_first();
   1504 	     uvm_physseg_valid_p(bank);
   1505 	     bank = uvm_physseg_get_next(bank)) {
   1506 		if (uvm.page_init_done == true)
   1507 			panic("pmap_steal_memory: called _after_ bootstrap");
   1508 
   1509 #if 0
   1510 		printf("     bank %d: avail_start 0x%"PRIxPADDR", start 0x%"PRIxPADDR", "
   1511 		    "avail_end 0x%"PRIxPADDR"\n", bank, uvm_physseg_get_avail_start(bank),
   1512 		    uvm_physseg_get_start(bank), uvm_physseg_get_avail_end(bank));
   1513 #endif
   1514 
   1515 		if (uvm_physseg_get_avail_start(bank) != uvm_physseg_get_start(bank) ||
   1516 		    uvm_physseg_get_avail_start(bank) >= uvm_physseg_get_avail_end(bank))
   1517 			continue;
   1518 
   1519 #if 0
   1520 		printf("             avail_end - avail_start = 0x%"PRIxPADDR"\n",
   1521 		    uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank));
   1522 #endif
   1523 
   1524 		if (uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank)
   1525 		    < npgs)
   1526 			continue;
   1527 
   1528 		/*
   1529 		 * There are enough pages here; steal them!
   1530 		 */
   1531 		pa = ptoa(uvm_physseg_get_start(bank));
   1532 		uvm_physseg_unplug(atop(pa), npgs);
   1533 
   1534 		va = ALPHA_PHYS_TO_K0SEG(pa);
   1535 		memset((void *)va, 0, size);
   1536 		pmap_pages_stolen += npgs;
   1537 		return (va);
   1538 	}
   1539 
   1540 	/*
   1541 	 * If we got here, this was no memory left.
   1542 	 */
   1543 	panic("pmap_steal_memory: no memory to steal");
   1544 }
   1545 
   1546 /*
   1547  * pmap_init:			[ INTERFACE ]
   1548  *
   1549  *	Initialize the pmap module.  Called by vm_init(), to initialize any
   1550  *	structures that the pmap system needs to map virtual memory.
   1551  *
   1552  *	Note: no locking is necessary in this function.
   1553  */
   1554 void
   1555 pmap_init(void)
   1556 {
   1557 
   1558 #ifdef DEBUG
   1559 	if (pmapdebug & PDB_FOLLOW)
   1560 	        printf("pmap_init()\n");
   1561 #endif
   1562 
   1563 	/* initialize protection array */
   1564 	alpha_protection_init();
   1565 
   1566 	/* Initialize TLB handling. */
   1567 	pmap_tlb_init();
   1568 
   1569 	/* Instrument pmap_growkernel(). */
   1570 	evcnt_attach_dynamic_nozero(&pmap_growkernel_evcnt, EVCNT_TYPE_MISC,
   1571 	    NULL, "pmap", "growkernel");
   1572 
   1573 	/*
   1574 	 * Set a low water mark on the pv_entry pool, so that we are
   1575 	 * more likely to have these around even in extreme memory
   1576 	 * starvation.
   1577 	 */
   1578 	pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat);
   1579 
   1580 	/*
   1581 	 * Now it is safe to enable pv entry recording.
   1582 	 */
   1583 	pmap_initialized = true;
   1584 
   1585 #if 0
   1586 	for (uvm_physseg_t bank = uvm_physseg_get_first();
   1587 	    uvm_physseg_valid_p(bank);
   1588 	    bank = uvm_physseg_get_next(bank)) {
   1589 		printf("bank %d\n", bank);
   1590 		printf("\tstart = 0x%lx\n", ptoa(uvm_physseg_get_start(bank)));
   1591 		printf("\tend = 0x%lx\n", ptoa(uvm_physseg_get_end(bank)));
   1592 		printf("\tavail_start = 0x%lx\n",
   1593 		    ptoa(uvm_physseg_get_avail_start(bank)));
   1594 		printf("\tavail_end = 0x%lx\n",
   1595 		    ptoa(uvm_physseg_get_avail_end(bank)));
   1596 	}
   1597 #endif
   1598 }
   1599 
   1600 /*
   1601  * pmap_create:			[ INTERFACE ]
   1602  *
   1603  *	Create and return a physical map.
   1604  *
   1605  *	Note: no locking is necessary in this function.
   1606  */
   1607 pmap_t
   1608 pmap_create(void)
   1609 {
   1610 	pmap_t pmap;
   1611 	pt_entry_t *lev1map;
   1612 	int i;
   1613 
   1614 #ifdef DEBUG
   1615 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
   1616 		printf("pmap_create()\n");
   1617 #endif
   1618 
   1619 	pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK);
   1620 	memset(pmap, 0, sizeof(*pmap));
   1621 	LIST_INIT(&pmap->pm_ptpages);
   1622 	LIST_INIT(&pmap->pm_pvents);
   1623 
   1624 	atomic_store_relaxed(&pmap->pm_count, 1);
   1625 
   1626  try_again:
   1627 	rw_enter(&pmap_growkernel_lock, RW_READER);
   1628 
   1629 	lev1map = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
   1630 	if (__predict_false(lev1map == NULL)) {
   1631 		rw_exit(&pmap_growkernel_lock);
   1632 		(void) kpause("pmap_create", false, hz >> 2, NULL);
   1633 		goto try_again;
   1634 	}
   1635 
   1636 	/*
   1637 	 * There are only kernel mappings at this point; give the pmap
   1638 	 * the kernel ASN.  This will be initialized to correct values
   1639 	 * when the pmap is activated.
   1640 	 *
   1641 	 * We stash a pointer to the pmap's lev1map in each CPU's
   1642 	 * private data.  It remains constant for the life of the
   1643 	 * pmap, and gives us more room in the shared pmap structure.
   1644 	 */
   1645 	for (i = 0; i < pmap_ncpuids; i++) {
   1646 		pmap->pm_percpu[i].pmc_asn = PMAP_ASN_KERNEL;
   1647 		pmap->pm_percpu[i].pmc_asngen = PMAP_ASNGEN_INVALID;
   1648 		pmap->pm_percpu[i].pmc_lev1map = lev1map;
   1649 	}
   1650 
   1651 	mutex_enter(&pmap_all_pmaps_lock);
   1652 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
   1653 	mutex_exit(&pmap_all_pmaps_lock);
   1654 
   1655 	rw_exit(&pmap_growkernel_lock);
   1656 
   1657 	return (pmap);
   1658 }
   1659 
   1660 /*
   1661  * pmap_destroy:		[ INTERFACE ]
   1662  *
   1663  *	Drop the reference count on the specified pmap, releasing
   1664  *	all resources if the reference count drops to zero.
   1665  */
   1666 void
   1667 pmap_destroy(pmap_t pmap)
   1668 {
   1669 
   1670 #ifdef DEBUG
   1671 	if (pmapdebug & PDB_FOLLOW)
   1672 		printf("pmap_destroy(%p)\n", pmap);
   1673 #endif
   1674 
   1675 	PMAP_MP(membar_release());
   1676 	KASSERT(atomic_load_relaxed(&pmap->pm_count) > 0);
   1677 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
   1678 		return;
   1679 	PMAP_MP(membar_acquire());
   1680 
   1681 	pt_entry_t *lev1map = pmap_lev1map(pmap);
   1682 
   1683 	rw_enter(&pmap_growkernel_lock, RW_READER);
   1684 
   1685 	/*
   1686 	 * Remove it from the global list of all pmaps.
   1687 	 */
   1688 	mutex_enter(&pmap_all_pmaps_lock);
   1689 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
   1690 	mutex_exit(&pmap_all_pmaps_lock);
   1691 
   1692 	pool_cache_put(&pmap_l1pt_cache, lev1map);
   1693 #ifdef DIAGNOSTIC
   1694 	int i;
   1695 	for (i = 0; i < pmap_ncpuids; i++) {
   1696 		pmap->pm_percpu[i].pmc_lev1map = (pt_entry_t *)0xdeadbeefUL;
   1697 	}
   1698 #endif /* DIAGNOSTIC */
   1699 
   1700 	rw_exit(&pmap_growkernel_lock);
   1701 
   1702 	pool_cache_put(&pmap_pmap_cache, pmap);
   1703 }
   1704 
   1705 /*
   1706  * pmap_reference:		[ INTERFACE ]
   1707  *
   1708  *	Add a reference to the specified pmap.
   1709  */
   1710 void
   1711 pmap_reference(pmap_t pmap)
   1712 {
   1713 	unsigned int newcount __diagused;
   1714 
   1715 #ifdef DEBUG
   1716 	if (pmapdebug & PDB_FOLLOW)
   1717 		printf("pmap_reference(%p)\n", pmap);
   1718 #endif
   1719 
   1720 	newcount = atomic_inc_uint_nv(&pmap->pm_count);
   1721 	KASSERT(newcount != 0);
   1722 }
   1723 
   1724 /*
   1725  * pmap_remove:			[ INTERFACE ]
   1726  *
   1727  *	Remove the given range of addresses from the specified map.
   1728  *
   1729  *	It is assumed that the start and end are properly
   1730  *	rounded to the page size.
   1731  */
   1732 static void
   1733 pmap_remove_internal(pmap_t pmap, vaddr_t sva, vaddr_t eva,
   1734     struct pmap_tlb_context * const tlbctx)
   1735 {
   1736 	pt_entry_t *l1pte, *l2pte, *l3pte;
   1737 	pt_entry_t *saved_l2pte, *saved_l3pte;
   1738 	vaddr_t l1eva, l2eva, l3vptva;
   1739 	pt_entry_t pte_bits;
   1740 
   1741 #ifdef DEBUG
   1742 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
   1743 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
   1744 #endif
   1745 
   1746 	/*
   1747 	 * If this is the kernel pmap, we can use a faster method
   1748 	 * for accessing the PTEs (since the PT pages are always
   1749 	 * resident).
   1750 	 *
   1751 	 * Note that this routine should NEVER be called from an
   1752 	 * interrupt context; pmap_kremove() is used for that.
   1753 	 */
   1754 	if (pmap == pmap_kernel()) {
   1755 		PMAP_MAP_TO_HEAD_LOCK();
   1756 		PMAP_LOCK(pmap);
   1757 
   1758 		while (sva < eva) {
   1759 			l3pte = PMAP_KERNEL_PTE(sva);
   1760 			if (pmap_pte_v(l3pte)) {
   1761 				pte_bits = pmap_remove_mapping(pmap, sva,
   1762 				    l3pte, true, NULL, tlbctx);
   1763 				pmap_tlb_shootdown(pmap, sva, pte_bits,
   1764 				    tlbctx);
   1765 			}
   1766 			sva += PAGE_SIZE;
   1767 		}
   1768 
   1769 		PMAP_MAP_TO_HEAD_UNLOCK();
   1770 		PMAP_UNLOCK(pmap);
   1771 		pmap_tlb_shootnow(tlbctx);
   1772 		/* kernel PT pages are never freed. */
   1773 		KASSERT(LIST_EMPTY(&tlbctx->t_freeptq));
   1774 		/* ...but we might have freed PV entries. */
   1775 		pmap_tlb_context_drain(tlbctx);
   1776 		TLB_COUNT(reason_remove_kernel);
   1777 
   1778 		return;
   1779 	}
   1780 
   1781 	pt_entry_t * const lev1map = pmap_lev1map(pmap);
   1782 
   1783 	KASSERT(sva < VM_MAXUSER_ADDRESS);
   1784 	KASSERT(eva <= VM_MAXUSER_ADDRESS);
   1785 	KASSERT(lev1map != kernel_lev1map);
   1786 
   1787 	PMAP_MAP_TO_HEAD_LOCK();
   1788 	PMAP_LOCK(pmap);
   1789 
   1790 	l1pte = pmap_l1pte(lev1map, sva);
   1791 
   1792 	for (; sva < eva; sva = l1eva, l1pte++) {
   1793 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
   1794 		if (pmap_pte_v(l1pte)) {
   1795 			saved_l2pte = l2pte = pmap_l2pte(lev1map, sva, l1pte);
   1796 
   1797 			/*
   1798 			 * Add a reference to the L2 table so it won't
   1799 			 * get removed from under us.
   1800 			 */
   1801 			pmap_physpage_addref(saved_l2pte);
   1802 
   1803 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
   1804 				l2eva =
   1805 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
   1806 				if (pmap_pte_v(l2pte)) {
   1807 					saved_l3pte = l3pte =
   1808 					    pmap_l3pte(lev1map, sva, l2pte);
   1809 
   1810 					/*
   1811 					 * Add a reference to the L3 table so
   1812 					 * it won't get removed from under us.
   1813 					 */
   1814 					pmap_physpage_addref(saved_l3pte);
   1815 
   1816 					/*
   1817 					 * Remember this sva; if the L3 table
   1818 					 * gets removed, we need to invalidate
   1819 					 * the VPT TLB entry for it.
   1820 					 */
   1821 					l3vptva = sva;
   1822 
   1823 					for (; sva < l2eva && sva < eva;
   1824 					     sva += PAGE_SIZE, l3pte++) {
   1825 						if (!pmap_pte_v(l3pte)) {
   1826 							continue;
   1827 						}
   1828 						pte_bits =
   1829 						    pmap_remove_mapping(
   1830 							pmap, sva,
   1831 							l3pte, true,
   1832 							NULL, tlbctx);
   1833 						pmap_tlb_shootdown(pmap,
   1834 						    sva, pte_bits, tlbctx);
   1835 					}
   1836 
   1837 					/*
   1838 					 * Remove the reference to the L3
   1839 					 * table that we added above.  This
   1840 					 * may free the L3 table.
   1841 					 */
   1842 					pmap_l3pt_delref(pmap, l3vptva,
   1843 					    saved_l3pte, tlbctx);
   1844 				}
   1845 			}
   1846 
   1847 			/*
   1848 			 * Remove the reference to the L2 table that we
   1849 			 * added above.  This may free the L2 table.
   1850 			 */
   1851 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, tlbctx);
   1852 		}
   1853 	}
   1854 
   1855 	PMAP_MAP_TO_HEAD_UNLOCK();
   1856 	PMAP_UNLOCK(pmap);
   1857 	pmap_tlb_shootnow(tlbctx);
   1858 	pmap_tlb_context_drain(tlbctx);
   1859 	TLB_COUNT(reason_remove_user);
   1860 }
   1861 
   1862 void
   1863 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
   1864 {
   1865 	struct pmap_tlb_context tlbctx;
   1866 
   1867 	pmap_tlb_context_init(&tlbctx, 0);
   1868 	pmap_remove_internal(pmap, sva, eva, &tlbctx);
   1869 }
   1870 
   1871 /*
   1872  * pmap_remove_all:		[ INTERFACE ]
   1873  *
   1874  *	Remove all mappings from a pmap in bulk.  This is only called
   1875  *	when it's known that the address space is no longer visible to
   1876  *	any user process (e.g. during exit or exec).
   1877  */
   1878 bool
   1879 pmap_remove_all(pmap_t pmap)
   1880 {
   1881 	struct pmap_tlb_context tlbctx;
   1882 	struct vm_page *pg;
   1883 	pv_entry_t pv;
   1884 
   1885 	KASSERT(pmap != pmap_kernel());
   1886 
   1887 	/*
   1888 	 * This process is pretty simple:
   1889 	 *
   1890 	 * ==> (1) Zero out the user-space portion of the lev1map.
   1891 	 *
   1892 	 * ==> (2) Copy the PT page list to the tlbctx and re-init.
   1893 	 *
   1894 	 * ==> (3) Walk the PV entry list and remove each entry.
   1895 	 *
   1896 	 * ==> (4) Zero the wired and resident count.
   1897 	 *
   1898 	 * Once we've done that, we just need to free everything
   1899 	 * back to the system.
   1900 	 */
   1901 
   1902 	pmap_tlb_context_init(&tlbctx, 0);
   1903 
   1904 	PMAP_MAP_TO_HEAD_LOCK();
   1905 	PMAP_LOCK(pmap);
   1906 
   1907 	/* Step 1 */
   1908 	pt_entry_t * const lev1map = pmap_lev1map(pmap);
   1909 	memset(lev1map, 0,
   1910 	       l1pte_index(VM_MAXUSER_ADDRESS) * sizeof(pt_entry_t));
   1911 
   1912 	/* Step 2 */
   1913 	LIST_MOVE(&pmap->pm_ptpages, &tlbctx.t_freeptq, pageq.list);
   1914 
   1915 	/* Fix up the reference count on the lev1map page. */
   1916 	pg = PHYS_TO_VM_PAGE(ALPHA_K0SEG_TO_PHYS((vaddr_t)lev1map));
   1917 	PHYSPAGE_REFCNT_SET(pg, 0);
   1918 
   1919 	/* Step 3 */
   1920 	while ((pv = LIST_FIRST(&pmap->pm_pvents)) != NULL) {
   1921 		KASSERT(pv->pv_pmap == pmap);
   1922 		pmap_pv_remove(pmap, PHYS_TO_VM_PAGE(pmap_pte_pa(pv->pv_pte)),
   1923 		    pv->pv_va, true, NULL, &tlbctx);
   1924 	}
   1925 
   1926 	/* Step 4 */
   1927 	atomic_store_relaxed(&pmap->pm_stats.wired_count, 0);
   1928 	atomic_store_relaxed(&pmap->pm_stats.resident_count, 0);
   1929 
   1930 	pmap_tlb_shootdown_all_user(pmap, PG_EXEC, &tlbctx);
   1931 
   1932 	PMAP_UNLOCK(pmap);
   1933 	PMAP_MAP_TO_HEAD_UNLOCK();
   1934 
   1935 	pmap_tlb_shootnow(&tlbctx);
   1936 	pmap_tlb_context_drain(&tlbctx);
   1937 	TLB_COUNT(reason_remove_all_user);
   1938 
   1939 	return true;
   1940 }
   1941 
   1942 /*
   1943  * pmap_page_protect:		[ INTERFACE ]
   1944  *
   1945  *	Lower the permission for all mappings to a given page to
   1946  *	the permissions specified.
   1947  */
   1948 void
   1949 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
   1950 {
   1951 	pv_entry_t pv, nextpv;
   1952 	pt_entry_t opte;
   1953 	kmutex_t *lock;
   1954 	struct pmap_tlb_context tlbctx;
   1955 
   1956 #ifdef DEBUG
   1957 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
   1958 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
   1959 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
   1960 #endif
   1961 
   1962 	pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV);
   1963 
   1964 	switch (prot) {
   1965 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
   1966 	case VM_PROT_READ|VM_PROT_WRITE:
   1967 		return;
   1968 
   1969 	/* copy_on_write */
   1970 	case VM_PROT_READ|VM_PROT_EXECUTE:
   1971 	case VM_PROT_READ:
   1972 		PMAP_HEAD_TO_MAP_LOCK();
   1973 		lock = pmap_pvh_lock(pg);
   1974 		mutex_enter(lock);
   1975 		for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) {
   1976 			PMAP_LOCK(pv->pv_pmap);
   1977 			opte = atomic_load_relaxed(pv->pv_pte);
   1978 			if (opte & (PG_KWE | PG_UWE)) {
   1979 				atomic_store_relaxed(pv->pv_pte,
   1980 				    opte & ~(PG_KWE | PG_UWE));
   1981 				pmap_tlb_shootdown_pv(pv->pv_pmap, pv->pv_va,
   1982 				    opte, &tlbctx);
   1983 			}
   1984 			PMAP_UNLOCK(pv->pv_pmap);
   1985 		}
   1986 		mutex_exit(lock);
   1987 		PMAP_HEAD_TO_MAP_UNLOCK();
   1988 		pmap_tlb_shootnow(&tlbctx);
   1989 		TLB_COUNT(reason_page_protect_read);
   1990 		return;
   1991 
   1992 	/* remove_all */
   1993 	default:
   1994 		break;
   1995 	}
   1996 
   1997 	PMAP_HEAD_TO_MAP_LOCK();
   1998 	lock = pmap_pvh_lock(pg);
   1999 	mutex_enter(lock);
   2000 	for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = nextpv) {
   2001 		pt_entry_t pte_bits;
   2002 		pmap_t pmap;
   2003 		vaddr_t va;
   2004 
   2005 		nextpv = pv->pv_next;
   2006 
   2007 		PMAP_LOCK(pv->pv_pmap);
   2008 		pmap = pv->pv_pmap;
   2009 		va = pv->pv_va;
   2010 		pte_bits = pmap_remove_mapping(pmap, va, pv->pv_pte,
   2011 		    false, NULL, &tlbctx);
   2012 		pmap_tlb_shootdown_pv(pmap, va, pte_bits, &tlbctx);
   2013 		PMAP_UNLOCK(pv->pv_pmap);
   2014 	}
   2015 	mutex_exit(lock);
   2016 	PMAP_HEAD_TO_MAP_UNLOCK();
   2017 	pmap_tlb_shootnow(&tlbctx);
   2018 	pmap_tlb_context_drain(&tlbctx);
   2019 	TLB_COUNT(reason_page_protect_none);
   2020 }
   2021 
   2022 /*
   2023  * pmap_protect:		[ INTERFACE ]
   2024  *
   2025  *	Set the physical protection on the specified range of this map
   2026  *	as requested.
   2027  */
   2028 void
   2029 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
   2030 {
   2031 	pt_entry_t *l1pte, *l2pte, *l3pte, opte;
   2032 	vaddr_t l1eva, l2eva;
   2033 	struct pmap_tlb_context tlbctx;
   2034 
   2035 #ifdef DEBUG
   2036 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
   2037 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
   2038 		    pmap, sva, eva, prot);
   2039 #endif
   2040 
   2041 	pmap_tlb_context_init(&tlbctx, 0);
   2042 
   2043 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
   2044 		pmap_remove_internal(pmap, sva, eva, &tlbctx);
   2045 		return;
   2046 	}
   2047 
   2048 	const pt_entry_t bits = pte_prot(pmap, prot);
   2049 	pt_entry_t * const lev1map = pmap_lev1map(pmap);
   2050 
   2051 	PMAP_LOCK(pmap);
   2052 
   2053 	l1pte = pmap_l1pte(lev1map, sva);
   2054 	for (; sva < eva; sva = l1eva, l1pte++) {
   2055 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
   2056 		if (pmap_pte_v(l1pte)) {
   2057 			l2pte = pmap_l2pte(lev1map, sva, l1pte);
   2058 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
   2059 				l2eva =
   2060 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
   2061 				if (pmap_pte_v(l2pte)) {
   2062 					l3pte = pmap_l3pte(lev1map, sva, l2pte);
   2063 					for (; sva < l2eva && sva < eva;
   2064 					     sva += PAGE_SIZE, l3pte++) {
   2065 						if (pmap_pte_v(l3pte) &&
   2066 						    pmap_pte_prot_chg(l3pte,
   2067 								      bits)) {
   2068 							opte = atomic_load_relaxed(l3pte);
   2069 							pmap_pte_set_prot(l3pte,
   2070 							   bits);
   2071 							pmap_tlb_shootdown(pmap,
   2072 							    sva, opte, &tlbctx);
   2073 						}
   2074 					}
   2075 				}
   2076 			}
   2077 		}
   2078 	}
   2079 
   2080 	PMAP_UNLOCK(pmap);
   2081 	pmap_tlb_shootnow(&tlbctx);
   2082 	TLB_COUNT(reason_protect);
   2083 }
   2084 
   2085 /*
   2086  * pmap_enter_tlb_shootdown:
   2087  *
   2088  *	Carry out a TLB shootdown on behalf of a pmap_enter()
   2089  *	or a pmap_kenter_pa().  This is factored out separately
   2090  *	because we expect it to be not a common case.
   2091  */
   2092 static void __noinline
   2093 pmap_enter_tlb_shootdown(pmap_t const pmap, vaddr_t const va,
   2094     pt_entry_t const pte_bits, bool locked)
   2095 {
   2096 	struct pmap_tlb_context tlbctx;
   2097 
   2098 	pmap_tlb_context_init(&tlbctx, 0);
   2099 	pmap_tlb_shootdown(pmap, va, pte_bits, &tlbctx);
   2100 	if (locked) {
   2101 		PMAP_UNLOCK(pmap);
   2102 	}
   2103 	pmap_tlb_shootnow(&tlbctx);
   2104 }
   2105 
   2106 /*
   2107  * pmap_enter_l2pt_delref:
   2108  *
   2109  *	Release a reference on an L2 PT page for pmap_enter().
   2110  *	This is factored out separately because we expect it
   2111  *	to be a rare case.
   2112  */
   2113 static void __noinline
   2114 pmap_enter_l2pt_delref(pmap_t const pmap, pt_entry_t * const l1pte,
   2115     pt_entry_t * const l2pte)
   2116 {
   2117 	struct pmap_tlb_context tlbctx;
   2118 
   2119 	/*
   2120 	 * PALcode may have tried to service a TLB miss with
   2121 	 * this L2 PTE, so we need to make sure we don't actually
   2122 	 * free the PT page until we've shot down any TLB entries
   2123 	 * for this VPT index.
   2124 	 */
   2125 
   2126 	pmap_tlb_context_init(&tlbctx, 0);
   2127 	pmap_l2pt_delref(pmap, l1pte, l2pte, &tlbctx);
   2128 	PMAP_UNLOCK(pmap);
   2129 	pmap_tlb_shootnow(&tlbctx);
   2130 	pmap_tlb_context_drain(&tlbctx);
   2131 	TLB_COUNT(reason_enter_l2pt_delref);
   2132 }
   2133 
   2134 /*
   2135  * pmap_enter_l3pt_delref:
   2136  *
   2137  *	Release a reference on an L3 PT page for pmap_enter().
   2138  *	This is factored out separately because we expect it
   2139  *	to be a rare case.
   2140  */
   2141 static void __noinline
   2142 pmap_enter_l3pt_delref(pmap_t const pmap, vaddr_t const va,
   2143     pt_entry_t * const pte)
   2144 {
   2145 	struct pmap_tlb_context tlbctx;
   2146 
   2147 	/*
   2148 	 * PALcode may have tried to service a TLB miss with
   2149 	 * this PTE, so we need to make sure we don't actually
   2150 	 * free the PT page until we've shot down any TLB entries
   2151 	 * for this VPT index.
   2152 	 */
   2153 
   2154 	pmap_tlb_context_init(&tlbctx, 0);
   2155 	pmap_l3pt_delref(pmap, va, pte, &tlbctx);
   2156 	PMAP_UNLOCK(pmap);
   2157 	pmap_tlb_shootnow(&tlbctx);
   2158 	pmap_tlb_context_drain(&tlbctx);
   2159 	TLB_COUNT(reason_enter_l3pt_delref);
   2160 }
   2161 
   2162 /*
   2163  * pmap_enter:			[ INTERFACE ]
   2164  *
   2165  *	Insert the given physical page (p) at
   2166  *	the specified virtual address (v) in the
   2167  *	target physical map with the protection requested.
   2168  *
   2169  *	If specified, the page will be wired down, meaning
   2170  *	that the related pte can not be reclaimed.
   2171  *
   2172  *	Note:  This is the only routine which MAY NOT lazy-evaluate
   2173  *	or lose information.  That is, this routine must actually
   2174  *	insert this page into the given map NOW.
   2175  */
   2176 int
   2177 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
   2178 {
   2179 	pt_entry_t *pte, npte, opte;
   2180 	pv_entry_t opv = NULL;
   2181 	paddr_t opa;
   2182 	bool tflush = false;
   2183 	int error = 0;
   2184 	kmutex_t *lock;
   2185 
   2186 #ifdef DEBUG
   2187 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
   2188 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
   2189 		       pmap, va, pa, prot, flags);
   2190 #endif
   2191 	struct vm_page * const pg = PHYS_TO_VM_PAGE(pa);
   2192 	const bool wired = (flags & PMAP_WIRED) != 0;
   2193 
   2194 	PMAP_MAP_TO_HEAD_LOCK();
   2195 	PMAP_LOCK(pmap);
   2196 
   2197 	if (pmap == pmap_kernel()) {
   2198 		KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
   2199 		pte = PMAP_KERNEL_PTE(va);
   2200 	} else {
   2201 		pt_entry_t *l1pte, *l2pte;
   2202 		pt_entry_t * const lev1map = pmap_lev1map(pmap);
   2203 
   2204 		KASSERT(va < VM_MAXUSER_ADDRESS);
   2205 		KASSERT(lev1map != kernel_lev1map);
   2206 
   2207 		/*
   2208 		 * Check to see if the level 1 PTE is valid, and
   2209 		 * allocate a new level 2 page table page if it's not.
   2210 		 * A reference will be added to the level 2 table when
   2211 		 * the level 3 table is created.
   2212 		 */
   2213 		l1pte = pmap_l1pte(lev1map, va);
   2214 		if (pmap_pte_v(l1pte) == 0) {
   2215 			pmap_physpage_addref(l1pte);
   2216 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
   2217 			if (error) {
   2218 				pmap_l1pt_delref(pmap, l1pte);
   2219 				if (flags & PMAP_CANFAIL)
   2220 					goto out;
   2221 				panic("pmap_enter: unable to create L2 PT "
   2222 				    "page");
   2223 			}
   2224 #ifdef DEBUG
   2225 			if (pmapdebug & PDB_PTPAGE)
   2226 				printf("pmap_enter: new level 2 table at "
   2227 				    "0x%lx\n", pmap_pte_pa(l1pte));
   2228 #endif
   2229 		}
   2230 
   2231 		/*
   2232 		 * Check to see if the level 2 PTE is valid, and
   2233 		 * allocate a new level 3 page table page if it's not.
   2234 		 * A reference will be added to the level 3 table when
   2235 		 * the mapping is validated.
   2236 		 */
   2237 		l2pte = pmap_l2pte(lev1map, va, l1pte);
   2238 		if (pmap_pte_v(l2pte) == 0) {
   2239 			pmap_physpage_addref(l2pte);
   2240 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
   2241 			if (error) {
   2242 				/* unlocks pmap */
   2243 				pmap_enter_l2pt_delref(pmap, l1pte, l2pte);
   2244 				if (flags & PMAP_CANFAIL) {
   2245 					PMAP_LOCK(pmap);
   2246 					goto out;
   2247 				}
   2248 				panic("pmap_enter: unable to create L3 PT "
   2249 				    "page");
   2250 			}
   2251 #ifdef DEBUG
   2252 			if (pmapdebug & PDB_PTPAGE)
   2253 				printf("pmap_enter: new level 3 table at "
   2254 				    "0x%lx\n", pmap_pte_pa(l2pte));
   2255 #endif
   2256 		}
   2257 
   2258 		/*
   2259 		 * Get the PTE that will map the page.
   2260 		 */
   2261 		pte = pmap_l3pte(lev1map, va, l2pte);
   2262 	}
   2263 
   2264 	/* Remember all of the old PTE; used for TBI check later. */
   2265 	opte = atomic_load_relaxed(pte);
   2266 
   2267 	/*
   2268 	 * Check to see if the old mapping is valid.  If not, validate the
   2269 	 * new one immediately.
   2270 	 */
   2271 	if ((opte & PG_V) == 0) {
   2272 		/* No TLB invalidations needed for new mappings. */
   2273 
   2274 		if (pmap != pmap_kernel()) {
   2275 			/*
   2276 			 * New mappings gain a reference on the level 3
   2277 			 * table.
   2278 			 */
   2279 			pmap_physpage_addref(pte);
   2280 		}
   2281 		goto validate_enterpv;
   2282 	}
   2283 
   2284 	opa = pmap_pte_pa(pte);
   2285 
   2286 	if (opa == pa) {
   2287 		/*
   2288 		 * Mapping has not changed; must be a protection or
   2289 		 * wiring change.
   2290 		 */
   2291 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
   2292 #ifdef DEBUG
   2293 			if (pmapdebug & PDB_ENTER)
   2294 				printf("pmap_enter: wiring change -> %d\n",
   2295 				    wired);
   2296 #endif
   2297 			/* Adjust the wiring count. */
   2298 			if (wired)
   2299 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
   2300 			else
   2301 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
   2302 		}
   2303 
   2304 		/* Set the PTE. */
   2305 		goto validate;
   2306 	}
   2307 
   2308 	/*
   2309 	 * The mapping has changed.  We need to invalidate the
   2310 	 * old mapping before creating the new one.
   2311 	 */
   2312 #ifdef DEBUG
   2313 	if (pmapdebug & PDB_ENTER)
   2314 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
   2315 #endif
   2316 	if (pmap != pmap_kernel()) {
   2317 		/*
   2318 		 * Gain an extra reference on the level 3 table.
   2319 		 * pmap_remove_mapping() will delete a reference,
   2320 		 * and we don't want the table to be erroneously
   2321 		 * freed.
   2322 		 */
   2323 		pmap_physpage_addref(pte);
   2324 	}
   2325 	/* Already have the bits from opte above. */
   2326 	(void) pmap_remove_mapping(pmap, va, pte, true, &opv, NULL);
   2327 
   2328  validate_enterpv:
   2329 	/* Enter the mapping into the pv_table if appropriate. */
   2330 	if (pg != NULL) {
   2331 		error = pmap_pv_enter(pmap, pg, va, pte, true, opv);
   2332 		if (error) {
   2333 			/* This can only fail if opv == NULL */
   2334 			KASSERT(opv == NULL);
   2335 
   2336 			/* unlocks pmap */
   2337 			pmap_enter_l3pt_delref(pmap, va, pte);
   2338 			if (flags & PMAP_CANFAIL) {
   2339 				PMAP_LOCK(pmap);
   2340 				goto out;
   2341 			}
   2342 			panic("pmap_enter: unable to enter mapping in PV "
   2343 			    "table");
   2344 		}
   2345 		opv = NULL;
   2346 	}
   2347 
   2348 	/* Increment counters. */
   2349 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
   2350 	if (wired)
   2351 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
   2352 
   2353  validate:
   2354 	/* Build the new PTE. */
   2355 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
   2356 	if (pg != NULL) {
   2357 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2358 		uintptr_t attrs = 0;
   2359 
   2360 		KASSERT(((flags & VM_PROT_ALL) & ~prot) == 0);
   2361 
   2362 		if (flags & VM_PROT_WRITE)
   2363 			attrs |= (PGA_REFERENCED|PGA_MODIFIED);
   2364 		else if (flags & VM_PROT_ALL)
   2365 			attrs |= PGA_REFERENCED;
   2366 
   2367 		lock = pmap_pvh_lock(pg);
   2368 		mutex_enter(lock);
   2369 		attrs = (md->pvh_listx |= attrs);
   2370 		mutex_exit(lock);
   2371 
   2372 		/* Set up referenced/modified emulation for new mapping. */
   2373 		if ((attrs & PGA_REFERENCED) == 0)
   2374 			npte |= PG_FOR | PG_FOW | PG_FOE;
   2375 		else if ((attrs & PGA_MODIFIED) == 0)
   2376 			npte |= PG_FOW;
   2377 
   2378 		/*
   2379 		 * Mapping was entered on PV list.
   2380 		 */
   2381 		npte |= PG_PVLIST;
   2382 	}
   2383 	if (wired)
   2384 		npte |= PG_WIRED;
   2385 #ifdef DEBUG
   2386 	if (pmapdebug & PDB_ENTER)
   2387 		printf("pmap_enter: new pte = 0x%lx\n", npte);
   2388 #endif
   2389 
   2390 	/*
   2391 	 * If the HW / PALcode portion of the new PTE is the same as the
   2392 	 * old PTE, no TBI is necessary.
   2393 	 */
   2394 	if (opte & PG_V) {
   2395 		tflush = PG_PALCODE(opte) != PG_PALCODE(npte);
   2396 	}
   2397 
   2398 	/* Set the new PTE. */
   2399 	atomic_store_relaxed(pte, npte);
   2400 
   2401 out:
   2402 	PMAP_MAP_TO_HEAD_UNLOCK();
   2403 
   2404 	/*
   2405 	 * Invalidate the TLB entry for this VA and any appropriate
   2406 	 * caches.
   2407 	 */
   2408 	if (tflush) {
   2409 		/* unlocks pmap */
   2410 		pmap_enter_tlb_shootdown(pmap, va, opte, true);
   2411 		if (pmap == pmap_kernel()) {
   2412 			TLB_COUNT(reason_enter_kernel);
   2413 		} else {
   2414 			TLB_COUNT(reason_enter_user);
   2415 		}
   2416 	} else {
   2417 		PMAP_UNLOCK(pmap);
   2418 	}
   2419 
   2420 	if (opv)
   2421 		pmap_pv_free(opv);
   2422 
   2423 	return error;
   2424 }
   2425 
   2426 /*
   2427  * pmap_kenter_pa:		[ INTERFACE ]
   2428  *
   2429  *	Enter a va -> pa mapping into the kernel pmap without any
   2430  *	physical->virtual tracking.
   2431  *
   2432  *	Note: no locking is necessary in this function.
   2433  */
   2434 void
   2435 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
   2436 {
   2437 	pmap_t const pmap = pmap_kernel();
   2438 
   2439 #ifdef DEBUG
   2440 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
   2441 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
   2442 		    va, pa, prot);
   2443 #endif
   2444 
   2445 	KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
   2446 
   2447 	pt_entry_t * const pte = PMAP_KERNEL_PTE(va);
   2448 
   2449 	/* Build the new PTE. */
   2450 	const pt_entry_t npte =
   2451 	    ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
   2452 	    PG_V | PG_WIRED;
   2453 
   2454 	/* Set the new PTE. */
   2455 	const pt_entry_t opte = atomic_load_relaxed(pte);
   2456 	atomic_store_relaxed(pte, npte);
   2457 
   2458 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
   2459 	PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
   2460 
   2461 	/*
   2462 	 * There should not have been anything here, previously,
   2463 	 * so we can skip TLB shootdowns, etc. in the common case.
   2464 	 */
   2465 	if (__predict_false(opte & PG_V)) {
   2466 		const pt_entry_t diff = npte ^ opte;
   2467 
   2468 		printf_nolog("%s: mapping already present\n", __func__);
   2469 		PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
   2470 		if (diff & PG_WIRED)
   2471 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
   2472 		/* XXX Can't handle this case. */
   2473 		if (diff & PG_PVLIST)
   2474 			panic("pmap_kenter_pa: old mapping was managed");
   2475 
   2476 		pmap_enter_tlb_shootdown(pmap_kernel(), va, opte, false);
   2477 		TLB_COUNT(reason_kenter);
   2478 	}
   2479 }
   2480 
   2481 /*
   2482  * pmap_kremove:		[ INTERFACE ]
   2483  *
   2484  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
   2485  *	for size bytes (assumed to be page rounded).
   2486  */
   2487 void
   2488 pmap_kremove(vaddr_t va, vsize_t size)
   2489 {
   2490 	pt_entry_t *pte, opte;
   2491 	pmap_t const pmap = pmap_kernel();
   2492 	struct pmap_tlb_context tlbctx;
   2493 	int count = 0;
   2494 
   2495 #ifdef DEBUG
   2496 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
   2497 		printf("pmap_kremove(%lx, %lx)\n",
   2498 		    va, size);
   2499 #endif
   2500 
   2501 	pmap_tlb_context_init(&tlbctx, 0);
   2502 
   2503 	KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
   2504 
   2505 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
   2506 		pte = PMAP_KERNEL_PTE(va);
   2507 		opte = atomic_load_relaxed(pte);
   2508 		if (opte & PG_V) {
   2509 			KASSERT((opte & PG_PVLIST) == 0);
   2510 
   2511 			/* Zap the mapping. */
   2512 			atomic_store_relaxed(pte, PG_NV);
   2513 			pmap_tlb_shootdown(pmap, va, opte, &tlbctx);
   2514 
   2515 			count++;
   2516 		}
   2517 	}
   2518 
   2519 	/* Update stats. */
   2520 	if (__predict_true(count != 0)) {
   2521 		PMAP_STAT_DECR(pmap->pm_stats.resident_count, count);
   2522 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, count);
   2523 	}
   2524 
   2525 	pmap_tlb_shootnow(&tlbctx);
   2526 	TLB_COUNT(reason_kremove);
   2527 }
   2528 
   2529 /*
   2530  * pmap_unwire:			[ INTERFACE ]
   2531  *
   2532  *	Clear the wired attribute for a map/virtual-address pair.
   2533  *
   2534  *	The mapping must already exist in the pmap.
   2535  */
   2536 void
   2537 pmap_unwire(pmap_t pmap, vaddr_t va)
   2538 {
   2539 	pt_entry_t *pte;
   2540 
   2541 #ifdef DEBUG
   2542 	if (pmapdebug & PDB_FOLLOW)
   2543 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
   2544 #endif
   2545 
   2546 	PMAP_LOCK(pmap);
   2547 
   2548 	pte = pmap_l3pte(pmap_lev1map(pmap), va, NULL);
   2549 
   2550 	KASSERT(pte != NULL);
   2551 	KASSERT(pmap_pte_v(pte));
   2552 
   2553 	/*
   2554 	 * If wiring actually changed (always?) clear the wire bit and
   2555 	 * update the wire count.  Note that wiring is not a hardware
   2556 	 * characteristic so there is no need to invalidate the TLB.
   2557 	 */
   2558 	if (pmap_pte_w_chg(pte, 0)) {
   2559 		pmap_pte_set_w(pte, false);
   2560 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
   2561 	}
   2562 #ifdef DEBUG
   2563 	else {
   2564 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
   2565 		    "didn't change!\n", pmap, va);
   2566 	}
   2567 #endif
   2568 
   2569 	PMAP_UNLOCK(pmap);
   2570 }
   2571 
   2572 /*
   2573  * pmap_extract:		[ INTERFACE ]
   2574  *
   2575  *	Extract the physical address associated with the given
   2576  *	pmap/virtual address pair.
   2577  */
   2578 bool
   2579 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
   2580 {
   2581 	pt_entry_t *l1pte, *l2pte, *l3pte;
   2582 	paddr_t pa;
   2583 
   2584 #ifdef DEBUG
   2585 	if (pmapdebug & PDB_FOLLOW)
   2586 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
   2587 #endif
   2588 
   2589 	/*
   2590 	 * Take a faster path for the kernel pmap.  Avoids locking,
   2591 	 * handles K0SEG.
   2592 	 */
   2593 	if (__predict_true(pmap == pmap_kernel())) {
   2594 #ifdef DEBUG
   2595 		bool address_is_valid = vtophys_internal(va, pap);
   2596 		if (pmapdebug & PDB_FOLLOW) {
   2597 			if (address_is_valid) {
   2598 				printf("0x%lx (kernel vtophys)\n", *pap);
   2599 			} else {
   2600 				printf("failed (kernel vtophys)\n");
   2601 			}
   2602 		}
   2603 		return address_is_valid;
   2604 #else
   2605 		return vtophys_internal(va, pap);
   2606 #endif
   2607 	}
   2608 
   2609 	pt_entry_t * const lev1map = pmap_lev1map(pmap);
   2610 
   2611 	PMAP_LOCK(pmap);
   2612 
   2613 	l1pte = pmap_l1pte(lev1map, va);
   2614 	if (pmap_pte_v(l1pte) == 0)
   2615 		goto out;
   2616 
   2617 	l2pte = pmap_l2pte(lev1map, va, l1pte);
   2618 	if (pmap_pte_v(l2pte) == 0)
   2619 		goto out;
   2620 
   2621 	l3pte = pmap_l3pte(lev1map, va, l2pte);
   2622 	if (pmap_pte_v(l3pte) == 0)
   2623 		goto out;
   2624 
   2625 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
   2626 	PMAP_UNLOCK(pmap);
   2627 	if (pap != NULL)
   2628 		*pap = pa;
   2629 #ifdef DEBUG
   2630 	if (pmapdebug & PDB_FOLLOW)
   2631 		printf("0x%lx\n", pa);
   2632 #endif
   2633 	return (true);
   2634 
   2635  out:
   2636 	PMAP_UNLOCK(pmap);
   2637 #ifdef DEBUG
   2638 	if (pmapdebug & PDB_FOLLOW)
   2639 		printf("failed\n");
   2640 #endif
   2641 	return (false);
   2642 }
   2643 
   2644 /*
   2645  * pmap_copy:			[ INTERFACE ]
   2646  *
   2647  *	Copy the mapping range specified by src_addr/len
   2648  *	from the source map to the range dst_addr/len
   2649  *	in the destination map.
   2650  *
   2651  *	This routine is only advisory and need not do anything.
   2652  */
   2653 /* call deleted in <machine/pmap.h> */
   2654 
   2655 /*
   2656  * pmap_update:			[ INTERFACE ]
   2657  *
   2658  *	Require that all active physical maps contain no
   2659  *	incorrect entries NOW, by processing any deferred
   2660  *	pmap operations.
   2661  */
   2662 /* call deleted in <machine/pmap.h> */
   2663 
   2664 /*
   2665  * pmap_activate:		[ INTERFACE ]
   2666  *
   2667  *	Activate the pmap used by the specified process.  This includes
   2668  *	reloading the MMU context of the current process, and marking
   2669  *	the pmap in use by the processor.
   2670  */
   2671 void
   2672 pmap_activate(struct lwp *l)
   2673 {
   2674 	struct pmap * const pmap = l->l_proc->p_vmspace->vm_map.pmap;
   2675 	struct pcb * const pcb = lwp_getpcb(l);
   2676 
   2677 #ifdef DEBUG
   2678 	if (pmapdebug & PDB_FOLLOW)
   2679 		printf("pmap_activate(%p)\n", l);
   2680 #endif
   2681 
   2682 	KASSERT(kpreempt_disabled());
   2683 
   2684 	struct cpu_info * const ci = curcpu();
   2685 
   2686 	KASSERT(l == ci->ci_curlwp);
   2687 
   2688 	u_long const old_ptbr = pcb->pcb_hw.apcb_ptbr;
   2689 	u_int const old_asn = pcb->pcb_hw.apcb_asn;
   2690 
   2691 	/*
   2692 	 * We hold the activation lock to synchronize with TLB shootdown.
   2693 	 * The kernel pmap does not require those tests because shootdowns
   2694 	 * for the kernel pmap are always sent to all CPUs.
   2695 	 */
   2696 	if (pmap != pmap_kernel()) {
   2697 		PMAP_ACT_LOCK(pmap);
   2698 		pcb->pcb_hw.apcb_asn = pmap_asn_alloc(pmap, ci);
   2699 		atomic_or_ulong(&pmap->pm_cpus, (1UL << ci->ci_cpuid));
   2700 	} else {
   2701 		pcb->pcb_hw.apcb_asn = PMAP_ASN_KERNEL;
   2702 	}
   2703 	pcb->pcb_hw.apcb_ptbr =
   2704 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)pmap_lev1map(pmap)) >> PGSHIFT;
   2705 
   2706 	/*
   2707 	 * Check to see if the ASN or page table base has changed; if
   2708 	 * so, switch to our own context again so that it will take
   2709 	 * effect.
   2710 	 *
   2711 	 * We test ASN first because it's the most likely value to change.
   2712 	 */
   2713 	if (old_asn != pcb->pcb_hw.apcb_asn ||
   2714 	    old_ptbr != pcb->pcb_hw.apcb_ptbr) {
   2715 		if (old_asn != pcb->pcb_hw.apcb_asn &&
   2716 		    old_ptbr != pcb->pcb_hw.apcb_ptbr) {
   2717 			TLB_COUNT(activate_both_change);
   2718 		} else if (old_asn != pcb->pcb_hw.apcb_asn) {
   2719 			TLB_COUNT(activate_asn_change);
   2720 		} else {
   2721 			TLB_COUNT(activate_ptbr_change);
   2722 		}
   2723 		(void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr);
   2724 		TLB_COUNT(activate_swpctx);
   2725 	} else {
   2726 		TLB_COUNT(activate_skip_swpctx);
   2727 	}
   2728 
   2729 	pmap_reference(pmap);
   2730 	ci->ci_pmap = pmap;
   2731 
   2732 	if (pmap != pmap_kernel()) {
   2733 		PMAP_ACT_UNLOCK(pmap);
   2734 	}
   2735 }
   2736 
   2737 /*
   2738  * pmap_deactivate:		[ INTERFACE ]
   2739  *
   2740  *	Mark that the pmap used by the specified process is no longer
   2741  *	in use by the processor.
   2742  */
   2743 void
   2744 pmap_deactivate(struct lwp *l)
   2745 {
   2746 	struct pmap * const pmap = l->l_proc->p_vmspace->vm_map.pmap;
   2747 
   2748 #ifdef DEBUG
   2749 	if (pmapdebug & PDB_FOLLOW)
   2750 		printf("pmap_deactivate(%p)\n", l);
   2751 #endif
   2752 
   2753 	KASSERT(kpreempt_disabled());
   2754 
   2755 	struct cpu_info * const ci = curcpu();
   2756 
   2757 	KASSERT(l == ci->ci_curlwp);
   2758 	KASSERT(pmap == ci->ci_pmap);
   2759 
   2760 	/*
   2761 	 * There is no need to switch to a different PTBR here,
   2762 	 * because a pmap_activate() or SWPCTX is guaranteed
   2763 	 * before whatever lev1map we're on now is invalidated
   2764 	 * or before user space is accessed again.
   2765 	 *
   2766 	 * Because only kernel mappings will be accessed before the
   2767 	 * next pmap_activate() call, we consider our CPU to be on
   2768 	 * the kernel pmap.
   2769 	 */
   2770 	ci->ci_pmap = pmap_kernel();
   2771 	KASSERT(atomic_load_relaxed(&pmap->pm_count) > 1);
   2772 	pmap_destroy(pmap);
   2773 }
   2774 
   2775 /* pmap_zero_page() is in pmap_subr.s */
   2776 
   2777 /* pmap_copy_page() is in pmap_subr.s */
   2778 
   2779 /*
   2780  * pmap_pageidlezero:		[ INTERFACE ]
   2781  *
   2782  *	Page zero'er for the idle loop.  Returns true if the
   2783  *	page was zero'd, FALSE if we aborted for some reason.
   2784  */
   2785 bool
   2786 pmap_pageidlezero(paddr_t pa)
   2787 {
   2788 	u_long *ptr;
   2789 	int i, cnt = PAGE_SIZE / sizeof(u_long);
   2790 
   2791 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
   2792 		if (sched_curcpu_runnable_p()) {
   2793 			/*
   2794 			 * An LWP has become ready.  Abort now,
   2795 			 * so we don't keep it waiting while we
   2796 			 * finish zeroing the page.
   2797 			 */
   2798 			return (false);
   2799 		}
   2800 		*ptr++ = 0;
   2801 	}
   2802 
   2803 	return (true);
   2804 }
   2805 
   2806 /*
   2807  * pmap_clear_modify:		[ INTERFACE ]
   2808  *
   2809  *	Clear the modify bits on the specified physical page.
   2810  */
   2811 bool
   2812 pmap_clear_modify(struct vm_page *pg)
   2813 {
   2814 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2815 	bool rv = false;
   2816 	kmutex_t *lock;
   2817 	struct pmap_tlb_context tlbctx;
   2818 
   2819 #ifdef DEBUG
   2820 	if (pmapdebug & PDB_FOLLOW)
   2821 		printf("pmap_clear_modify(%p)\n", pg);
   2822 #endif
   2823 
   2824 	pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV);
   2825 
   2826 	PMAP_HEAD_TO_MAP_LOCK();
   2827 	lock = pmap_pvh_lock(pg);
   2828 	mutex_enter(lock);
   2829 
   2830 	if (md->pvh_listx & PGA_MODIFIED) {
   2831 		rv = true;
   2832 		pmap_changebit(pg, PG_FOW, ~0UL, &tlbctx);
   2833 		md->pvh_listx &= ~PGA_MODIFIED;
   2834 	}
   2835 
   2836 	mutex_exit(lock);
   2837 	PMAP_HEAD_TO_MAP_UNLOCK();
   2838 
   2839 	pmap_tlb_shootnow(&tlbctx);
   2840 	TLB_COUNT(reason_clear_modify);
   2841 
   2842 	return (rv);
   2843 }
   2844 
   2845 /*
   2846  * pmap_clear_reference:	[ INTERFACE ]
   2847  *
   2848  *	Clear the reference bit on the specified physical page.
   2849  */
   2850 bool
   2851 pmap_clear_reference(struct vm_page *pg)
   2852 {
   2853 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2854 	bool rv = false;
   2855 	kmutex_t *lock;
   2856 	struct pmap_tlb_context tlbctx;
   2857 
   2858 #ifdef DEBUG
   2859 	if (pmapdebug & PDB_FOLLOW)
   2860 		printf("pmap_clear_reference(%p)\n", pg);
   2861 #endif
   2862 
   2863 	pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV);
   2864 
   2865 	PMAP_HEAD_TO_MAP_LOCK();
   2866 	lock = pmap_pvh_lock(pg);
   2867 	mutex_enter(lock);
   2868 
   2869 	if (md->pvh_listx & PGA_REFERENCED) {
   2870 		rv = true;
   2871 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0UL, &tlbctx);
   2872 		md->pvh_listx &= ~PGA_REFERENCED;
   2873 	}
   2874 
   2875 	mutex_exit(lock);
   2876 	PMAP_HEAD_TO_MAP_UNLOCK();
   2877 
   2878 	pmap_tlb_shootnow(&tlbctx);
   2879 	TLB_COUNT(reason_clear_reference);
   2880 
   2881 	return (rv);
   2882 }
   2883 
   2884 /*
   2885  * pmap_is_referenced:		[ INTERFACE ]
   2886  *
   2887  *	Return whether or not the specified physical page is referenced
   2888  *	by any physical maps.
   2889  */
   2890 /* See <machine/pmap.h> */
   2891 
   2892 /*
   2893  * pmap_is_modified:		[ INTERFACE ]
   2894  *
   2895  *	Return whether or not the specified physical page is modified
   2896  *	by any physical maps.
   2897  */
   2898 /* See <machine/pmap.h> */
   2899 
   2900 /*
   2901  * pmap_phys_address:		[ INTERFACE ]
   2902  *
   2903  *	Return the physical address corresponding to the specified
   2904  *	cookie.  Used by the device pager to decode a device driver's
   2905  *	mmap entry point return value.
   2906  *
   2907  *	Note: no locking is necessary in this function.
   2908  */
   2909 paddr_t
   2910 pmap_phys_address(paddr_t ppn)
   2911 {
   2912 
   2913 	return (alpha_ptob(ppn));
   2914 }
   2915 
   2916 /*
   2917  * Miscellaneous support routines follow
   2918  */
   2919 
   2920 /*
   2921  * alpha_protection_init:
   2922  *
   2923  *	Initialize Alpha protection code array.
   2924  *
   2925  *	Note: no locking is necessary in this function.
   2926  */
   2927 static void
   2928 alpha_protection_init(void)
   2929 {
   2930 	int prot, *kp, *up;
   2931 
   2932 	kp = protection_codes[0];
   2933 	up = protection_codes[1];
   2934 
   2935 	for (prot = 0; prot < 8; prot++) {
   2936 		kp[prot] = PG_ASM;
   2937 		up[prot] = 0;
   2938 
   2939 		if (prot & VM_PROT_READ) {
   2940 			kp[prot] |= PG_KRE;
   2941 			up[prot] |= PG_KRE | PG_URE;
   2942 		}
   2943 		if (prot & VM_PROT_WRITE) {
   2944 			kp[prot] |= PG_KWE;
   2945 			up[prot] |= PG_KWE | PG_UWE;
   2946 		}
   2947 		if (prot & VM_PROT_EXECUTE) {
   2948 			kp[prot] |= PG_EXEC | PG_KRE;
   2949 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
   2950 		} else {
   2951 			kp[prot] |= PG_FOE;
   2952 			up[prot] |= PG_FOE;
   2953 		}
   2954 	}
   2955 }
   2956 
   2957 /*
   2958  * pmap_remove_mapping:
   2959  *
   2960  *	Invalidate a single page denoted by pmap/va.
   2961  *
   2962  *	If (pte != NULL), it is the already computed PTE for the page.
   2963  *
   2964  *	Note: locking in this function is complicated by the fact
   2965  *	that it can be called when the PV list is already locked.
   2966  *	(pmap_page_protect()).  In this case, the caller must be
   2967  *	careful to get the next PV entry while we remove this entry
   2968  *	from beneath it.  We assume that the pmap itself is already
   2969  *	locked; dolock applies only to the PV list.
   2970  *
   2971  *	Returns important PTE bits that the caller needs to check for
   2972  *	TLB / I-stream invalidation purposes.
   2973  */
   2974 static pt_entry_t
   2975 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
   2976     bool dolock, pv_entry_t *opvp, struct pmap_tlb_context * const tlbctx)
   2977 {
   2978 	pt_entry_t opte;
   2979 	paddr_t pa;
   2980 	struct vm_page *pg;		/* if != NULL, page is managed */
   2981 
   2982 #ifdef DEBUG
   2983 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
   2984 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %p, %p)\n",
   2985 		       pmap, va, pte, dolock, opvp, tlbctx);
   2986 #endif
   2987 
   2988 	/*
   2989 	 * PTE not provided, compute it from pmap and va.
   2990 	 */
   2991 	if (pte == NULL) {
   2992 		pte = pmap_l3pte(pmap_lev1map(pmap), va, NULL);
   2993 		if (pmap_pte_v(pte) == 0)
   2994 			return 0;
   2995 	}
   2996 
   2997 	opte = *pte;
   2998 
   2999 	pa = PG_PFNUM(opte) << PGSHIFT;
   3000 
   3001 	/*
   3002 	 * Update statistics
   3003 	 */
   3004 	if (pmap_pte_w(pte))
   3005 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
   3006 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
   3007 
   3008 	/*
   3009 	 * Invalidate the PTE after saving the reference modify info.
   3010 	 */
   3011 #ifdef DEBUG
   3012 	if (pmapdebug & PDB_REMOVE)
   3013 		printf("remove: invalidating pte at %p\n", pte);
   3014 #endif
   3015 	atomic_store_relaxed(pte, PG_NV);
   3016 
   3017 	/*
   3018 	 * If we're removing a user mapping, check to see if we
   3019 	 * can free page table pages.
   3020 	 */
   3021 	if (pmap != pmap_kernel()) {
   3022 		/*
   3023 		 * Delete the reference on the level 3 table.  It will
   3024 		 * delete references on the level 2 and 1 tables as
   3025 		 * appropriate.
   3026 		 */
   3027 		pmap_l3pt_delref(pmap, va, pte, tlbctx);
   3028 	}
   3029 
   3030 	if (opte & PG_PVLIST) {
   3031 		/*
   3032 		 * Remove it from the PV table.
   3033 		 */
   3034 		pg = PHYS_TO_VM_PAGE(pa);
   3035 		KASSERT(pg != NULL);
   3036 		pmap_pv_remove(pmap, pg, va, dolock, opvp, tlbctx);
   3037 		KASSERT(opvp == NULL || *opvp != NULL);
   3038 	}
   3039 
   3040 	return opte & (PG_V | PG_ASM | PG_EXEC);
   3041 }
   3042 
   3043 /*
   3044  * pmap_changebit:
   3045  *
   3046  *	Set or clear the specified PTE bits for all mappings on the
   3047  *	specified page.
   3048  *
   3049  *	Note: we assume that the pv_head is already locked, and that
   3050  *	the caller has acquired a PV->pmap mutex so that we can lock
   3051  *	the pmaps as we encounter them.
   3052  */
   3053 static void
   3054 pmap_changebit(struct vm_page *pg, pt_entry_t set, pt_entry_t mask,
   3055     struct pmap_tlb_context * const tlbctx)
   3056 {
   3057 	pv_entry_t pv;
   3058 	pt_entry_t *pte, npte, opte;
   3059 
   3060 #ifdef DEBUG
   3061 	if (pmapdebug & PDB_BITS)
   3062 		printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n",
   3063 		    pg, set, mask);
   3064 #endif
   3065 
   3066 	/*
   3067 	 * Loop over all current mappings setting/clearing as apropos.
   3068 	 */
   3069 	for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) {
   3070 		PMAP_LOCK(pv->pv_pmap);
   3071 
   3072 		pte = pv->pv_pte;
   3073 
   3074 		opte = atomic_load_relaxed(pte);
   3075 		npte = (opte | set) & mask;
   3076 		if (npte != opte) {
   3077 			atomic_store_relaxed(pte, npte);
   3078 			pmap_tlb_shootdown_pv(pv->pv_pmap, pv->pv_va,
   3079 			    opte, tlbctx);
   3080 		}
   3081 		PMAP_UNLOCK(pv->pv_pmap);
   3082 	}
   3083 }
   3084 
   3085 /*
   3086  * pmap_emulate_reference:
   3087  *
   3088  *	Emulate reference and/or modified bit hits.
   3089  *	Return 1 if this was an execute fault on a non-exec mapping,
   3090  *	otherwise return 0.
   3091  */
   3092 int
   3093 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
   3094 {
   3095 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
   3096 	pt_entry_t faultoff, *pte;
   3097 	struct vm_page *pg;
   3098 	paddr_t pa;
   3099 	bool didlock = false;
   3100 	bool exec = false;
   3101 	kmutex_t *lock;
   3102 
   3103 #ifdef DEBUG
   3104 	if (pmapdebug & PDB_FOLLOW)
   3105 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
   3106 		    l, v, user, type);
   3107 #endif
   3108 
   3109 	/*
   3110 	 * Convert process and virtual address to physical address.
   3111 	 */
   3112 	if (v >= VM_MIN_KERNEL_ADDRESS) {
   3113 		if (user)
   3114 			panic("pmap_emulate_reference: user ref to kernel");
   3115 		/*
   3116 		 * No need to lock here; kernel PT pages never go away.
   3117 		 */
   3118 		pte = PMAP_KERNEL_PTE(v);
   3119 	} else {
   3120 #ifdef DIAGNOSTIC
   3121 		if (l == NULL)
   3122 			panic("pmap_emulate_reference: bad proc");
   3123 		if (l->l_proc->p_vmspace == NULL)
   3124 			panic("pmap_emulate_reference: bad p_vmspace");
   3125 #endif
   3126 		PMAP_LOCK(pmap);
   3127 		didlock = true;
   3128 		pte = pmap_l3pte(pmap_lev1map(pmap), v, NULL);
   3129 		/*
   3130 		 * We'll unlock below where we're done with the PTE.
   3131 		 */
   3132 	}
   3133 	exec = pmap_pte_exec(pte);
   3134 	if (!exec && type == ALPHA_MMCSR_FOE) {
   3135 		if (didlock)
   3136 			PMAP_UNLOCK(pmap);
   3137 	       return (1);
   3138 	}
   3139 #ifdef DEBUG
   3140 	if (pmapdebug & PDB_FOLLOW) {
   3141 		printf("\tpte = %p, ", pte);
   3142 		printf("*pte = 0x%lx\n", *pte);
   3143 	}
   3144 #endif
   3145 
   3146 	pa = pmap_pte_pa(pte);
   3147 
   3148 	/*
   3149 	 * We're now done with the PTE.  If it was a user pmap, unlock
   3150 	 * it now.
   3151 	 */
   3152 	if (didlock)
   3153 		PMAP_UNLOCK(pmap);
   3154 
   3155 #ifdef DEBUG
   3156 	if (pmapdebug & PDB_FOLLOW)
   3157 		printf("\tpa = 0x%lx\n", pa);
   3158 #endif
   3159 #ifdef DIAGNOSTIC
   3160 	if (!uvm_pageismanaged(pa))
   3161 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
   3162 		      "pa 0x%lx not managed", l, v, user, type, pa);
   3163 #endif
   3164 
   3165 	/*
   3166 	 * Twiddle the appropriate bits to reflect the reference
   3167 	 * and/or modification..
   3168 	 *
   3169 	 * The rules:
   3170 	 * 	(1) always mark page as used, and
   3171 	 *	(2) if it was a write fault, mark page as modified.
   3172 	 */
   3173 	pg = PHYS_TO_VM_PAGE(pa);
   3174 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   3175 	struct pmap_tlb_context tlbctx;
   3176 
   3177 	pmap_tlb_context_init(&tlbctx, TLB_CTX_F_PV);
   3178 
   3179 	PMAP_HEAD_TO_MAP_LOCK();
   3180 	lock = pmap_pvh_lock(pg);
   3181 	mutex_enter(lock);
   3182 
   3183 	if (type == ALPHA_MMCSR_FOW) {
   3184 		md->pvh_listx |= (PGA_REFERENCED|PGA_MODIFIED);
   3185 		faultoff = PG_FOR | PG_FOW;
   3186 	} else {
   3187 		md->pvh_listx |= PGA_REFERENCED;
   3188 		faultoff = PG_FOR;
   3189 		if (exec) {
   3190 			faultoff |= PG_FOE;
   3191 		}
   3192 	}
   3193 	pmap_changebit(pg, 0, ~faultoff, &tlbctx);
   3194 
   3195 	mutex_exit(lock);
   3196 	PMAP_HEAD_TO_MAP_UNLOCK();
   3197 
   3198 	pmap_tlb_shootnow(&tlbctx);
   3199 	TLB_COUNT(reason_emulate_reference);
   3200 
   3201 	return (0);
   3202 }
   3203 
   3204 #ifdef DEBUG
   3205 /*
   3206  * pmap_pv_dump:
   3207  *
   3208  *	Dump the physical->virtual data for the specified page.
   3209  */
   3210 void
   3211 pmap_pv_dump(paddr_t pa)
   3212 {
   3213 	struct vm_page *pg;
   3214 	struct vm_page_md *md;
   3215 	pv_entry_t pv;
   3216 	kmutex_t *lock;
   3217 
   3218 	pg = PHYS_TO_VM_PAGE(pa);
   3219 	md = VM_PAGE_TO_MD(pg);
   3220 
   3221 	lock = pmap_pvh_lock(pg);
   3222 	mutex_enter(lock);
   3223 
   3224 	printf("pa 0x%lx (attrs = 0x%lx):\n", pa, md->pvh_listx & PGA_ATTRS);
   3225 	for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next)
   3226 		printf("     pmap %p, va 0x%lx\n",
   3227 		    pv->pv_pmap, pv->pv_va);
   3228 	printf("\n");
   3229 
   3230 	mutex_exit(lock);
   3231 }
   3232 #endif
   3233 
   3234 /*
   3235  * vtophys:
   3236  *
   3237  *	Return the physical address corresponding to the K0SEG or
   3238  *	K1SEG address provided.
   3239  *
   3240  *	Note: no locking is necessary in this function.
   3241  */
   3242 static bool
   3243 vtophys_internal(vaddr_t const vaddr, paddr_t * const pap)
   3244 {
   3245 	paddr_t pa;
   3246 
   3247 	KASSERT(vaddr >= ALPHA_K0SEG_BASE);
   3248 
   3249 	if (vaddr <= ALPHA_K0SEG_END) {
   3250 		pa = ALPHA_K0SEG_TO_PHYS(vaddr);
   3251 	} else {
   3252 		pt_entry_t * const pte = PMAP_KERNEL_PTE(vaddr);
   3253 		if (__predict_false(! pmap_pte_v(pte))) {
   3254 			return false;
   3255 		}
   3256 		pa = pmap_pte_pa(pte) | (vaddr & PGOFSET);
   3257 	}
   3258 
   3259 	if (pap != NULL) {
   3260 		*pap = pa;
   3261 	}
   3262 
   3263 	return true;
   3264 }
   3265 
   3266 paddr_t
   3267 vtophys(vaddr_t const vaddr)
   3268 {
   3269 	paddr_t pa;
   3270 
   3271 	if (__predict_false(! vtophys_internal(vaddr, &pa)))
   3272 		pa = 0;
   3273 	return pa;
   3274 }
   3275 
   3276 /******************** pv_entry management ********************/
   3277 
   3278 /*
   3279  * pmap_pv_enter:
   3280  *
   3281  *	Add a physical->virtual entry to the pv_table.
   3282  */
   3283 static int
   3284 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
   3285     bool dolock, pv_entry_t newpv)
   3286 {
   3287 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   3288 	kmutex_t *lock;
   3289 
   3290 	/*
   3291 	 * Allocate and fill in the new pv_entry.
   3292 	 */
   3293 	if (newpv == NULL) {
   3294 		newpv = pmap_pv_alloc();
   3295 		if (newpv == NULL)
   3296 			return ENOMEM;
   3297 	}
   3298 	newpv->pv_va = va;
   3299 	newpv->pv_pmap = pmap;
   3300 	newpv->pv_pte = pte;
   3301 
   3302 	if (dolock) {
   3303 		lock = pmap_pvh_lock(pg);
   3304 		mutex_enter(lock);
   3305 	}
   3306 
   3307 #ifdef DEBUG
   3308     {
   3309 	pv_entry_t pv;
   3310 	/*
   3311 	 * Make sure the entry doesn't already exist.
   3312 	 */
   3313 	for (pv = VM_MDPAGE_PVS(pg); pv != NULL; pv = pv->pv_next) {
   3314 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
   3315 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
   3316 			panic("pmap_pv_enter: already in pv table");
   3317 		}
   3318 	}
   3319     }
   3320 #endif
   3321 
   3322 	/*
   3323 	 * ...and put it in the list.
   3324 	 */
   3325 	uintptr_t const attrs = md->pvh_listx & PGA_ATTRS;
   3326 	newpv->pv_next = (struct pv_entry *)(md->pvh_listx & ~PGA_ATTRS);
   3327 	md->pvh_listx = (uintptr_t)newpv | attrs;
   3328 	LIST_INSERT_HEAD(&pmap->pm_pvents, newpv, pv_link);
   3329 
   3330 	if (dolock) {
   3331 		mutex_exit(lock);
   3332 	}
   3333 
   3334 	return 0;
   3335 }
   3336 
   3337 /*
   3338  * pmap_pv_remove:
   3339  *
   3340  *	Remove a physical->virtual entry from the pv_table.
   3341  */
   3342 static void
   3343 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock,
   3344     pv_entry_t *opvp, struct pmap_tlb_context * const tlbctx)
   3345 {
   3346 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   3347 	pv_entry_t pv, *pvp;
   3348 	kmutex_t *lock;
   3349 
   3350 	if (dolock) {
   3351 		lock = pmap_pvh_lock(pg);
   3352 		mutex_enter(lock);
   3353 	} else {
   3354 		lock = NULL; /* XXX stupid gcc */
   3355 	}
   3356 
   3357 	/*
   3358 	 * Find the entry to remove.
   3359 	 */
   3360 	for (pvp = (struct pv_entry **)&md->pvh_listx, pv = VM_MDPAGE_PVS(pg);
   3361 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
   3362 		if (pmap == pv->pv_pmap && va == pv->pv_va)
   3363 			break;
   3364 
   3365 	KASSERT(pv != NULL);
   3366 
   3367 	/*
   3368 	 * The page attributes are in the lower 2 bits of the first
   3369 	 * PV entry pointer.  Rather than comparing the pointer address
   3370 	 * and branching, we just always preserve what might be there
   3371 	 * (either attribute bits or zero bits).
   3372 	 */
   3373 	*pvp = (pv_entry_t)((uintptr_t)pv->pv_next |
   3374 			    (((uintptr_t)*pvp) & PGA_ATTRS));
   3375 	LIST_REMOVE(pv, pv_link);
   3376 
   3377 	if (dolock) {
   3378 		mutex_exit(lock);
   3379 	}
   3380 
   3381 	if (opvp != NULL) {
   3382 		*opvp = pv;
   3383 	} else {
   3384 		KASSERT(tlbctx != NULL);
   3385 		LIST_INSERT_HEAD(&tlbctx->t_freepvq, pv, pv_link);
   3386 	}
   3387 }
   3388 
   3389 /*
   3390  * pmap_pv_page_alloc:
   3391  *
   3392  *	Allocate a page for the pv_entry pool.
   3393  */
   3394 static void *
   3395 pmap_pv_page_alloc(struct pool *pp, int flags)
   3396 {
   3397 	struct vm_page * const pg = pmap_physpage_alloc(PGU_PVENT);
   3398 	if (__predict_false(pg == NULL)) {
   3399 		return NULL;
   3400 	}
   3401 	return (void *)ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(pg));
   3402 }
   3403 
   3404 /*
   3405  * pmap_pv_page_free:
   3406  *
   3407  *	Free a pv_entry pool page.
   3408  */
   3409 static void
   3410 pmap_pv_page_free(struct pool *pp, void *v)
   3411 {
   3412 
   3413 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
   3414 }
   3415 
   3416 /******************** misc. functions ********************/
   3417 
   3418 /*
   3419  * pmap_physpage_alloc:
   3420  *
   3421  *	Allocate a single page from the VM system and return the
   3422  *	physical address for that page.
   3423  */
   3424 static struct vm_page *
   3425 pmap_physpage_alloc(int usage)
   3426 {
   3427 	struct vm_page *pg;
   3428 
   3429 	/*
   3430 	 * Don't ask for a zero'd page in the L1PT case -- we will
   3431 	 * properly initialize it in the constructor.
   3432 	 */
   3433 
   3434 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
   3435 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
   3436 	if (pg != NULL) {
   3437 		KASSERT(PHYSPAGE_REFCNT(pg) == 0);
   3438 	}
   3439 	return pg;
   3440 }
   3441 
   3442 /*
   3443  * pmap_physpage_free:
   3444  *
   3445  *	Free the single page table page at the specified physical address.
   3446  */
   3447 static void
   3448 pmap_physpage_free(paddr_t pa)
   3449 {
   3450 	struct vm_page *pg;
   3451 
   3452 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
   3453 		panic("pmap_physpage_free: bogus physical page address");
   3454 
   3455 	KASSERT(PHYSPAGE_REFCNT(pg) == 0);
   3456 
   3457 	uvm_pagefree(pg);
   3458 }
   3459 
   3460 /*
   3461  * pmap_physpage_addref:
   3462  *
   3463  *	Add a reference to the specified special use page.
   3464  */
   3465 static int
   3466 pmap_physpage_addref(void *kva)
   3467 {
   3468 	struct vm_page *pg;
   3469 	paddr_t pa;
   3470 
   3471 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
   3472 	pg = PHYS_TO_VM_PAGE(pa);
   3473 
   3474 	KASSERT(PHYSPAGE_REFCNT(pg) < UINT32_MAX);
   3475 
   3476 	return PHYSPAGE_REFCNT_INC(pg);
   3477 }
   3478 
   3479 /*
   3480  * pmap_physpage_delref:
   3481  *
   3482  *	Delete a reference to the specified special use page.
   3483  */
   3484 static int
   3485 pmap_physpage_delref(void *kva)
   3486 {
   3487 	struct vm_page *pg;
   3488 	paddr_t pa;
   3489 
   3490 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
   3491 	pg = PHYS_TO_VM_PAGE(pa);
   3492 
   3493 	KASSERT(PHYSPAGE_REFCNT(pg) != 0);
   3494 
   3495 	return PHYSPAGE_REFCNT_DEC(pg);
   3496 }
   3497 
   3498 /******************** page table page management ********************/
   3499 
   3500 static bool
   3501 pmap_kptpage_alloc(paddr_t *pap)
   3502 {
   3503 	if (uvm.page_init_done == false) {
   3504 		/*
   3505 		 * We're growing the kernel pmap early (from
   3506 		 * uvm_pageboot_alloc()).  This case must
   3507 		 * be handled a little differently.
   3508 		 */
   3509 		*pap = ALPHA_K0SEG_TO_PHYS(
   3510 		    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
   3511 		return true;
   3512 	}
   3513 
   3514 	struct vm_page * const pg = pmap_physpage_alloc(PGU_NORMAL);
   3515 	if (__predict_true(pg != NULL)) {
   3516 		*pap = VM_PAGE_TO_PHYS(pg);
   3517 		return true;
   3518 	}
   3519 	return false;
   3520 }
   3521 
   3522 /*
   3523  * pmap_growkernel:		[ INTERFACE ]
   3524  *
   3525  *	Grow the kernel address space.  This is a hint from the
   3526  *	upper layer to pre-allocate more kernel PT pages.
   3527  */
   3528 vaddr_t
   3529 pmap_growkernel(vaddr_t maxkvaddr)
   3530 {
   3531 	struct pmap *pm;
   3532 	paddr_t ptaddr;
   3533 	pt_entry_t *l1pte, *l2pte, pte;
   3534 	pt_entry_t *lev1map;
   3535 	vaddr_t va;
   3536 	int l1idx;
   3537 
   3538 	rw_enter(&pmap_growkernel_lock, RW_WRITER);
   3539 
   3540 	if (maxkvaddr <= virtual_end)
   3541 		goto out;		/* we are OK */
   3542 
   3543 	pmap_growkernel_evcnt.ev_count++;
   3544 
   3545 	va = virtual_end;
   3546 
   3547 	while (va < maxkvaddr) {
   3548 		/*
   3549 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
   3550 		 * allocate a new L2 PT page and insert it into the
   3551 		 * L1 map.
   3552 		 */
   3553 		l1pte = pmap_l1pte(kernel_lev1map, va);
   3554 		if (pmap_pte_v(l1pte) == 0) {
   3555 			if (!pmap_kptpage_alloc(&ptaddr))
   3556 				goto die;
   3557 			pte = (atop(ptaddr) << PG_SHIFT) |
   3558 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
   3559 			*l1pte = pte;
   3560 
   3561 			l1idx = l1pte_index(va);
   3562 
   3563 			/* Update all the user pmaps. */
   3564 			mutex_enter(&pmap_all_pmaps_lock);
   3565 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
   3566 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
   3567 				/* Skip the kernel pmap. */
   3568 				if (pm == pmap_kernel())
   3569 					continue;
   3570 
   3571 				/*
   3572 				 * Any pmaps published on the global list
   3573 				 * should never be referencing kernel_lev1map.
   3574 				 */
   3575 				lev1map = pmap_lev1map(pm);
   3576 				KASSERT(lev1map != kernel_lev1map);
   3577 
   3578 				PMAP_LOCK(pm);
   3579 				lev1map[l1idx] = pte;
   3580 				PMAP_UNLOCK(pm);
   3581 			}
   3582 			mutex_exit(&pmap_all_pmaps_lock);
   3583 		}
   3584 
   3585 		/*
   3586 		 * Have an L2 PT page now, add the L3 PT page.
   3587 		 */
   3588 		l2pte = pmap_l2pte(kernel_lev1map, va, l1pte);
   3589 		KASSERT(pmap_pte_v(l2pte) == 0);
   3590 		if (!pmap_kptpage_alloc(&ptaddr))
   3591 			goto die;
   3592 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
   3593 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
   3594 		va += ALPHA_L2SEG_SIZE;
   3595 	}
   3596 
   3597 	/* Invalidate the L1 PT cache. */
   3598 	pool_cache_invalidate(&pmap_l1pt_cache);
   3599 
   3600 	virtual_end = va;
   3601 
   3602  out:
   3603 	rw_exit(&pmap_growkernel_lock);
   3604 
   3605 	return (virtual_end);
   3606 
   3607  die:
   3608 	panic("pmap_growkernel: out of memory");
   3609 }
   3610 
   3611 /*
   3612  * pmap_l1pt_ctor:
   3613  *
   3614  *	Pool cache constructor for L1 PT pages.
   3615  *
   3616  *	Note: The growkernel lock is held across allocations
   3617  *	from our pool_cache, so we don't need to acquire it
   3618  *	ourselves.
   3619  */
   3620 static int
   3621 pmap_l1pt_ctor(void *arg, void *object, int flags)
   3622 {
   3623 	pt_entry_t *l1pt = object, pte;
   3624 	int i;
   3625 
   3626 	/*
   3627 	 * Initialize the new level 1 table by zeroing the
   3628 	 * user portion and copying the kernel mappings into
   3629 	 * the kernel portion.
   3630 	 */
   3631 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
   3632 		l1pt[i] = 0;
   3633 
   3634 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
   3635 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
   3636 		l1pt[i] = kernel_lev1map[i];
   3637 
   3638 	/*
   3639 	 * Now, map the new virtual page table.  NOTE: NO ASM!
   3640 	 */
   3641 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
   3642 	    PG_V | PG_KRE | PG_KWE;
   3643 	l1pt[l1pte_index(VPTBASE)] = pte;
   3644 
   3645 	return (0);
   3646 }
   3647 
   3648 /*
   3649  * pmap_l1pt_alloc:
   3650  *
   3651  *	Page allocator for L1 PT pages.
   3652  */
   3653 static void *
   3654 pmap_l1pt_alloc(struct pool *pp, int flags)
   3655 {
   3656 	/*
   3657 	 * Attempt to allocate a free page.
   3658 	 */
   3659 	struct vm_page * const pg = pmap_physpage_alloc(PGU_L1PT);
   3660 	if (__predict_false(pg == NULL)) {
   3661 		return NULL;
   3662 	}
   3663 	return (void *)ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(pg));
   3664 }
   3665 
   3666 /*
   3667  * pmap_l1pt_free:
   3668  *
   3669  *	Page freer for L1 PT pages.
   3670  */
   3671 static void
   3672 pmap_l1pt_free(struct pool *pp, void *v)
   3673 {
   3674 
   3675 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
   3676 }
   3677 
   3678 /*
   3679  * pmap_ptpage_alloc:
   3680  *
   3681  *	Allocate a level 2 or level 3 page table page for a user
   3682  *	pmap, and initialize the PTE that references it.
   3683  *
   3684  *	Note: the pmap must already be locked.
   3685  */
   3686 static int
   3687 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t * const pte, int const usage)
   3688 {
   3689 	/*
   3690 	 * Allocate the page table page.
   3691 	 */
   3692 	struct vm_page * const pg = pmap_physpage_alloc(usage);
   3693 	if (__predict_false(pg == NULL)) {
   3694 		return ENOMEM;
   3695 	}
   3696 
   3697 	LIST_INSERT_HEAD(&pmap->pm_ptpages, pg, pageq.list);
   3698 
   3699 	/*
   3700 	 * Initialize the referencing PTE.
   3701 	 */
   3702 	const pt_entry_t npte = ((VM_PAGE_TO_PHYS(pg) >> PGSHIFT) << PG_SHIFT) |
   3703 	    PG_V | PG_KRE | PG_KWE | PG_WIRED;
   3704 
   3705 	atomic_store_relaxed(pte, npte);
   3706 
   3707 	return (0);
   3708 }
   3709 
   3710 /*
   3711  * pmap_ptpage_free:
   3712  *
   3713  *	Free the level 2 or level 3 page table page referenced
   3714  *	be the provided PTE.
   3715  *
   3716  *	Note: the pmap must already be locked.
   3717  */
   3718 static void
   3719 pmap_ptpage_free(pmap_t pmap, pt_entry_t * const pte,
   3720     struct pmap_tlb_context * const tlbctx)
   3721 {
   3722 
   3723 	/*
   3724 	 * Extract the physical address of the page from the PTE
   3725 	 * and clear the entry.
   3726 	 */
   3727 	const paddr_t ptpa = pmap_pte_pa(pte);
   3728 	atomic_store_relaxed(pte, PG_NV);
   3729 
   3730 	struct vm_page * const pg = PHYS_TO_VM_PAGE(ptpa);
   3731 	KASSERT(pg != NULL);
   3732 
   3733 	KASSERT(PHYSPAGE_REFCNT(pg) == 0);
   3734 #ifdef DEBUG
   3735 	pmap_zero_page(ptpa);
   3736 #endif
   3737 
   3738 	LIST_REMOVE(pg, pageq.list);
   3739 	LIST_INSERT_HEAD(&tlbctx->t_freeptq, pg, pageq.list);
   3740 }
   3741 
   3742 /*
   3743  * pmap_l3pt_delref:
   3744  *
   3745  *	Delete a reference on a level 3 PT page.  If the reference drops
   3746  *	to zero, free it.
   3747  *
   3748  *	Note: the pmap must already be locked.
   3749  */
   3750 static void
   3751 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte,
   3752     struct pmap_tlb_context * const tlbctx)
   3753 {
   3754 	pt_entry_t *l1pte, *l2pte;
   3755 	pt_entry_t * const lev1map = pmap_lev1map(pmap);
   3756 
   3757 	l1pte = pmap_l1pte(lev1map, va);
   3758 	l2pte = pmap_l2pte(lev1map, va, l1pte);
   3759 
   3760 #ifdef DIAGNOSTIC
   3761 	if (pmap == pmap_kernel())
   3762 		panic("pmap_l3pt_delref: kernel pmap");
   3763 #endif
   3764 
   3765 	if (pmap_physpage_delref(l3pte) == 0) {
   3766 		/*
   3767 		 * No more mappings; we can free the level 3 table.
   3768 		 */
   3769 #ifdef DEBUG
   3770 		if (pmapdebug & PDB_PTPAGE)
   3771 			printf("pmap_l3pt_delref: freeing level 3 table at "
   3772 			    "0x%lx\n", pmap_pte_pa(l2pte));
   3773 #endif
   3774 		/*
   3775 		 * You can pass NULL if you know the last reference won't
   3776 		 * be dropped.
   3777 		 */
   3778 		KASSERT(tlbctx != NULL);
   3779 		pmap_ptpage_free(pmap, l2pte, tlbctx);
   3780 
   3781 		/*
   3782 		 * We've freed a level 3 table, so we must invalidate
   3783 		 * any now-stale TLB entries for the corresponding VPT
   3784 		 * VA range.  Easiest way to guarantee this is to hit
   3785 		 * all of the user TLB entries.
   3786 		 */
   3787 		pmap_tlb_shootdown_all_user(pmap, PG_V, tlbctx);
   3788 
   3789 		/*
   3790 		 * We've freed a level 3 table, so delete the reference
   3791 		 * on the level 2 table.
   3792 		 */
   3793 		pmap_l2pt_delref(pmap, l1pte, l2pte, tlbctx);
   3794 	}
   3795 }
   3796 
   3797 /*
   3798  * pmap_l2pt_delref:
   3799  *
   3800  *	Delete a reference on a level 2 PT page.  If the reference drops
   3801  *	to zero, free it.
   3802  *
   3803  *	Note: the pmap must already be locked.
   3804  */
   3805 static void
   3806 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
   3807     struct pmap_tlb_context * const tlbctx)
   3808 {
   3809 
   3810 #ifdef DIAGNOSTIC
   3811 	if (pmap == pmap_kernel())
   3812 		panic("pmap_l2pt_delref: kernel pmap");
   3813 #endif
   3814 
   3815 	if (pmap_physpage_delref(l2pte) == 0) {
   3816 		/*
   3817 		 * No more mappings in this segment; we can free the
   3818 		 * level 2 table.
   3819 		 */
   3820 #ifdef DEBUG
   3821 		if (pmapdebug & PDB_PTPAGE)
   3822 			printf("pmap_l2pt_delref: freeing level 2 table at "
   3823 			    "0x%lx\n", pmap_pte_pa(l1pte));
   3824 #endif
   3825 		/*
   3826 		 * You can pass NULL if you know the last reference won't
   3827 		 * be dropped.
   3828 		 */
   3829 		KASSERT(tlbctx != NULL);
   3830 		pmap_ptpage_free(pmap, l1pte, tlbctx);
   3831 
   3832 		/*
   3833 		 * We've freed a level 2 table, so we must invalidate
   3834 		 * any now-stale TLB entries for the corresponding VPT
   3835 		 * VA range.  Easiest way to guarantee this is to hit
   3836 		 * all of the user TLB entries.
   3837 		 */
   3838 		pmap_tlb_shootdown_all_user(pmap, PG_V, tlbctx);
   3839 
   3840 		/*
   3841 		 * We've freed a level 2 table, so delete the reference
   3842 		 * on the level 1 table.
   3843 		 */
   3844 		pmap_l1pt_delref(pmap, l1pte);
   3845 	}
   3846 }
   3847 
   3848 /*
   3849  * pmap_l1pt_delref:
   3850  *
   3851  *	Delete a reference on a level 1 PT page.
   3852  */
   3853 static void
   3854 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte)
   3855 {
   3856 
   3857 	KASSERT(pmap != pmap_kernel());
   3858 
   3859 	(void)pmap_physpage_delref(l1pte);
   3860 }
   3861 
   3862 /******************** Address Space Number management ********************/
   3863 
   3864 /*
   3865  * pmap_asn_alloc:
   3866  *
   3867  *	Allocate and assign an ASN to the specified pmap.
   3868  *
   3869  *	Note: the pmap must already be locked.  This may be called from
   3870  *	an interprocessor interrupt, and in that case, the sender of
   3871  *	the IPI has the pmap lock.
   3872  */
   3873 static u_int
   3874 pmap_asn_alloc(pmap_t const pmap, struct cpu_info * const ci)
   3875 {
   3876 
   3877 #ifdef DEBUG
   3878 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
   3879 		printf("pmap_asn_alloc(%p)\n", pmap);
   3880 #endif
   3881 
   3882 	KASSERT(pmap != pmap_kernel());
   3883 	KASSERT(pmap->pm_percpu[ci->ci_cpuid].pmc_lev1map != kernel_lev1map);
   3884 	KASSERT(kpreempt_disabled());
   3885 
   3886 	/* No work to do if the CPU does not implement ASNs. */
   3887 	if (pmap_max_asn == 0)
   3888 		return 0;
   3889 
   3890 	struct pmap_percpu * const pmc = &pmap->pm_percpu[ci->ci_cpuid];
   3891 
   3892 	/*
   3893 	 * Hopefully, we can continue using the one we have...
   3894 	 *
   3895 	 * N.B. the generation check will fail the first time
   3896 	 * any pmap is activated on a given CPU, because we start
   3897 	 * the generation counter at 1, but initialize pmaps with
   3898 	 * 0; this forces the first ASN allocation to occur.
   3899 	 */
   3900 	if (pmc->pmc_asngen == ci->ci_asn_gen) {
   3901 #ifdef DEBUG
   3902 		if (pmapdebug & PDB_ASN)
   3903 			printf("pmap_asn_alloc: same generation, keeping %u\n",
   3904 			    pmc->pmc_asn);
   3905 #endif
   3906 		TLB_COUNT(asn_reuse);
   3907 		return pmc->pmc_asn;
   3908 	}
   3909 
   3910 	/*
   3911 	 * Need to assign a new ASN.  Grab the next one, incrementing
   3912 	 * the generation number if we have to.
   3913 	 */
   3914 	if (ci->ci_next_asn > pmap_max_asn) {
   3915 		/*
   3916 		 * Invalidate all non-PG_ASM TLB entries and the
   3917 		 * I-cache, and bump the generation number.
   3918 		 */
   3919 		ALPHA_TBIAP();
   3920 		alpha_pal_imb();
   3921 
   3922 		ci->ci_next_asn = PMAP_ASN_FIRST_USER;
   3923 		ci->ci_asn_gen++;
   3924 		TLB_COUNT(asn_newgen);
   3925 
   3926 		/*
   3927 		 * Make sure the generation number doesn't wrap.  We could
   3928 		 * handle this scenario by traversing all of the pmaps,
   3929 		 * and invalidating the generation number on those which
   3930 		 * are not currently in use by this processor.
   3931 		 *
   3932 		 * However... considering that we're using an unsigned 64-bit
   3933 		 * integer for generation numbers, on non-ASN CPUs, we won't
   3934 		 * wrap for approximately 75 billion years on a 128-ASN CPU
   3935 		 * (assuming 1000 switch * operations per second).
   3936 		 *
   3937 		 * So, we don't bother.
   3938 		 */
   3939 		KASSERT(ci->ci_asn_gen != PMAP_ASNGEN_INVALID);
   3940 #ifdef DEBUG
   3941 		if (pmapdebug & PDB_ASN)
   3942 			printf("pmap_asn_alloc: generation bumped to %lu\n",
   3943 			    ci->ci_asn_gen);
   3944 #endif
   3945 	}
   3946 
   3947 	/*
   3948 	 * Assign the new ASN and validate the generation number.
   3949 	 */
   3950 	pmc->pmc_asn = ci->ci_next_asn++;
   3951 	pmc->pmc_asngen = ci->ci_asn_gen;
   3952 	TLB_COUNT(asn_assign);
   3953 
   3954 	/*
   3955 	 * We have a new ASN, so we can skip any pending I-stream sync
   3956 	 * on the way back out to user space.
   3957 	 */
   3958 	pmc->pmc_needisync = 0;
   3959 
   3960 #ifdef DEBUG
   3961 	if (pmapdebug & PDB_ASN)
   3962 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
   3963 		    pmc->pmc_asn, pmap);
   3964 #endif
   3965 	return pmc->pmc_asn;
   3966 }
   3967