Home | History | Annotate | Line # | Download | only in sparc64
      1 /*	$NetBSD: pmap.c,v 1.319 2024/02/09 22:08:33 andvar Exp $	*/
      2 /*
      3  *
      4  * Copyright (C) 1996-1999 Eduardo Horvath.
      5  * All rights reserved.
      6  *
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
     15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
     18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     24  * SUCH DAMAGE.
     25  *
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.319 2024/02/09 22:08:33 andvar Exp $");
     30 
     31 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
     32 #define	HWREF
     33 
     34 #include "opt_ddb.h"
     35 #include "opt_multiprocessor.h"
     36 #include "opt_modular.h"
     37 
     38 #include <sys/param.h>
     39 #include <sys/queue.h>
     40 #include <sys/systm.h>
     41 #include <sys/msgbuf.h>
     42 #include <sys/pool.h>
     43 #include <sys/exec.h>
     44 #include <sys/core.h>
     45 #include <sys/kcore.h>
     46 #include <sys/proc.h>
     47 #include <sys/atomic.h>
     48 #include <sys/cpu.h>
     49 
     50 #include <sys/exec_aout.h>	/* for MID_* */
     51 #include <sys/reboot.h>
     52 
     53 #include <uvm/uvm.h>
     54 
     55 #include <machine/pcb.h>
     56 #include <machine/sparc64.h>
     57 #include <machine/ctlreg.h>
     58 #include <machine/promlib.h>
     59 #include <machine/kcore.h>
     60 #include <machine/bootinfo.h>
     61 #ifdef SUN4V
     62 #include <machine/hypervisor.h>
     63 #endif
     64 #include <machine/mdesc.h>
     65 
     66 #include <sparc64/sparc64/cache.h>
     67 
     68 #ifdef DDB
     69 #include <machine/db_machdep.h>
     70 #include <ddb/db_command.h>
     71 #include <ddb/db_sym.h>
     72 #include <ddb/db_variables.h>
     73 #include <ddb/db_extern.h>
     74 #include <ddb/db_access.h>
     75 #include <ddb/db_output.h>
     76 #else
     77 #define Debugger()
     78 #define db_printf	printf
     79 #endif
     80 
     81 #define	MEG		(1<<20) /* 1MB */
     82 #define	KB		(1<<10)	/* 1KB */
     83 
     84 paddr_t cpu0paddr;		/* contiguous phys memory preallocated for cpus */
     85 
     86 /* These routines are in assembly to allow access thru physical mappings */
     87 extern int64_t pseg_get_real(struct pmap *, vaddr_t);
     88 extern int pseg_set_real(struct pmap *, vaddr_t, int64_t, paddr_t);
     89 
     90 /*
     91  * Diatribe on ref/mod counting:
     92  *
     93  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
     94  * in the pv_entry structure for each page.  (We could bypass this for the
     95  * vm_page, but that's a long story....)
     96  *
     97  * This architecture has nice, fast traps with lots of space for software bits
     98  * in the TTE.  To accelerate ref/mod counts we make use of these features.
     99  *
    100  * When we map a page initially, we place a TTE in the page table.  It's
    101  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
    102  * writable we set the TLB_REAL_W bit for the trap handler.
    103  *
    104  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
    105  * bit in the approprate TTE in the page table.  Whenever we take a protection
    106  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
    107  * bits to enable writing and mark the page as modified.
    108  *
    109  * This means that we may have ref/mod information all over the place.  The
    110  * pmap routines must traverse the page tables of all pmaps with a given page
    111  * and collect/clear all the ref/mod information and copy it into the pv_entry.
    112  */
    113 
    114 #ifdef	NO_VCACHE
    115 #define	FORCE_ALIAS	1
    116 #else
    117 #define FORCE_ALIAS	0
    118 #endif
    119 
    120 #define	PV_ALIAS	0x1LL
    121 #define PV_REF		0x2LL
    122 #define PV_MOD		0x4LL
    123 #define PV_NVC		0x8LL
    124 #define PV_NC		0x10LL
    125 #define PV_WE		0x20LL	/* Debug -- this page was writable somtime */
    126 #define PV_MASK		(0x03fLL)
    127 #define PV_VAMASK	(~(PAGE_SIZE - 1))
    128 #define PV_MATCH(pv,va)	(!(((pv)->pv_va ^ (va)) & PV_VAMASK))
    129 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \
    130 					(((pv)->pv_va) & PV_MASK)))
    131 
    132 struct pool_cache pmap_cache;
    133 struct pool_cache pmap_pv_cache;
    134 
    135 pv_entry_t	pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *);
    136 void	pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *,
    137 			   pv_entry_t *);
    138 void	pmap_page_cache(struct pmap *, paddr_t, int);
    139 
    140 /*
    141  * First and last managed physical addresses.
    142  * XXX only used for dumping the system.
    143  */
    144 paddr_t	vm_first_phys, vm_num_phys;
    145 
    146 /*
    147  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
    148  */
    149 int tsbsize;		/* tsbents = 512 * 2^^tsbsize */
    150 #define TSBENTS (512<<tsbsize)
    151 #define	TSBSIZE	(TSBENTS * 16)
    152 
    153 static struct pmap kernel_pmap_;
    154 struct pmap *const kernel_pmap_ptr = &kernel_pmap_;
    155 
    156 static int ctx_alloc(struct pmap *);
    157 static bool pmap_is_referenced_locked(struct vm_page *);
    158 
    159 static void ctx_free(struct pmap *, struct cpu_info *);
    160 
    161 /* set dmmu secondary context */
    162 static __inline void
    163 dmmu_set_secondary_context(uint ctx)
    164 {
    165 
    166 	if (!CPU_ISSUN4V)
    167 		__asm volatile(
    168 			"stxa %0,[%1]%2;	"
    169 			"membar #Sync		"
    170 			: : "r" (ctx), "r" (CTX_SECONDARY), "n" (ASI_DMMU)
    171 			: "memory");
    172 	else
    173 		__asm volatile(
    174 			"stxa %0,[%1]%2;	"
    175 			"membar #Sync		"
    176 			: : "r" (ctx), "r" (CTX_SECONDARY), "n" (ASI_MMU_CONTEXTID)
    177 			: "memory");
    178 }
    179 
    180 /*
    181  * Check if any MMU has a non-zero context
    182  */
    183 static inline bool
    184 pmap_has_ctx(struct pmap *p)
    185 {
    186 	int i;
    187 
    188 	/* any context on any cpu? */
    189 	for (i = 0; i < sparc_ncpus; i++)
    190 		if (p->pm_ctx[i] > 0)
    191 			return true;
    192 
    193 	return false;
    194 }
    195 
    196 /*
    197  * Check if this pmap has a live mapping on some MMU.
    198  */
    199 static inline bool
    200 pmap_is_on_mmu(struct pmap *p)
    201 {
    202 	/* The kernel pmap is always on all MMUs */
    203 	if (p == pmap_kernel())
    204 		return true;
    205 
    206 	return pmap_has_ctx(p);
    207 }
    208 
    209 /*
    210  * Virtual and physical addresses of the start and end of kernel text
    211  * and data segments.
    212  */
    213 vaddr_t ktext;
    214 paddr_t ktextp;
    215 vaddr_t ektext;
    216 paddr_t ektextp;
    217 vaddr_t kdata;
    218 paddr_t kdatap;
    219 vaddr_t ekdata;
    220 paddr_t ekdatap;
    221 
    222 /*
    223  * Kernel 4MB pages.
    224  */
    225 extern struct tlb_entry *kernel_tlbs;
    226 extern int kernel_dtlb_slots, kernel_itlb_slots;
    227 
    228 static int npgs;
    229 
    230 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
    231 
    232 int phys_installed_size;		/* Installed physical memory */
    233 struct mem_region *phys_installed;
    234 
    235 paddr_t avail_start, avail_end;	/* These are used by ps & family */
    236 
    237 static int ptelookup_va(vaddr_t va);
    238 
    239 static inline void
    240 clrx(void *addr)
    241 {
    242 	__asm volatile("clrx [%0]" : : "r" (addr) : "memory");
    243 }
    244 
    245 static void
    246 tsb_invalidate(vaddr_t va, pmap_t pm)
    247 {
    248 	struct cpu_info *ci;
    249 	int ctx;
    250 	bool kpm = (pm == pmap_kernel());
    251 	int i;
    252 	int64_t tag;
    253 
    254 	i = ptelookup_va(va);
    255 #ifdef MULTIPROCESSOR
    256 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
    257 		if (!CPUSET_HAS(cpus_active, ci->ci_index))
    258 			continue;
    259 #else
    260 		ci = curcpu();
    261 #endif
    262 		ctx = pm->pm_ctx[ci->ci_index];
    263 		if (kpm || ctx > 0) {
    264 			tag = TSB_TAG(0, ctx, va);
    265 			if (ci->ci_tsb_dmmu[i].tag == tag) {
    266 				clrx(&ci->ci_tsb_dmmu[i].data);
    267 			}
    268 			if (ci->ci_tsb_immu[i].tag == tag) {
    269 				clrx(&ci->ci_tsb_immu[i].data);
    270 			}
    271 		}
    272 #ifdef MULTIPROCESSOR
    273 	}
    274 #endif
    275 }
    276 
    277 struct prom_map *prom_map;
    278 int prom_map_size;
    279 
    280 #define	PDB_CREATE		0x000001
    281 #define	PDB_DESTROY		0x000002
    282 #define	PDB_REMOVE		0x000004
    283 #define	PDB_CHANGEPROT		0x000008
    284 #define	PDB_ENTER		0x000010
    285 #define	PDB_DEMAP		0x000020	/* used in locore */
    286 #define	PDB_REF			0x000040
    287 #define	PDB_COPY		0x000080
    288 #define	PDB_MMU_ALLOC		0x000100
    289 #define	PDB_MMU_STEAL		0x000200
    290 #define	PDB_CTX_ALLOC		0x000400
    291 #define	PDB_CTX_STEAL		0x000800
    292 #define	PDB_MMUREG_ALLOC	0x001000
    293 #define	PDB_MMUREG_STEAL	0x002000
    294 #define	PDB_CACHESTUFF		0x004000
    295 #define	PDB_ALIAS		0x008000
    296 #define PDB_EXTRACT		0x010000
    297 #define	PDB_BOOT		0x020000
    298 #define	PDB_BOOT1		0x040000
    299 #define	PDB_GROW		0x080000
    300 #define	PDB_CTX_FLUSHALL	0x100000
    301 #define	PDB_ACTIVATE		0x200000
    302 
    303 #if defined(DEBUG) && !defined(PMAP_DEBUG)
    304 #define PMAP_DEBUG
    305 #endif
    306 
    307 #ifdef PMAP_DEBUG
    308 struct {
    309 	int kernel;	/* entering kernel mapping */
    310 	int user;	/* entering user mapping */
    311 	int ptpneeded;	/* needed to allocate a PT page */
    312 	int pwchange;	/* no mapping change, just wiring or protection */
    313 	int wchange;	/* no mapping change, just wiring */
    314 	int mchange;	/* was mapped but mapping to different page */
    315 	int managed;	/* a managed page */
    316 	int firstpv;	/* first mapping for this PA */
    317 	int secondpv;	/* second mapping for this PA */
    318 	int ci;		/* cache inhibited */
    319 	int unmanaged;	/* not a managed page */
    320 	int flushes;	/* cache flushes */
    321 	int cachehit;	/* new entry forced valid entry out */
    322 } enter_stats;
    323 struct {
    324 	int calls;
    325 	int removes;
    326 	int flushes;
    327 	int tflushes;	/* TLB flushes */
    328 	int pidflushes;	/* HW pid stolen */
    329 	int pvfirst;
    330 	int pvsearch;
    331 } remove_stats;
    332 #define	ENTER_STAT(x)	do { enter_stats.x ++; } while (0)
    333 #define	REMOVE_STAT(x)	do { remove_stats.x ++; } while (0)
    334 
    335 int	pmapdebug = 0;
    336 //int	pmapdebug = 0 | PDB_CTX_ALLOC | PDB_ACTIVATE;
    337 /* Number of H/W pages stolen for page tables */
    338 int	pmap_pages_stolen = 0;
    339 
    340 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
    341 #define	DPRINTF(n, f)	if (pmapdebug & (n)) printf f
    342 #else
    343 #define	ENTER_STAT(x)	do { /* nothing */ } while (0)
    344 #define	REMOVE_STAT(x)	do { /* nothing */ } while (0)
    345 #define	BDPRINTF(n, f)
    346 #define	DPRINTF(n, f)
    347 #define pmapdebug 0
    348 #endif
    349 
    350 #define pv_check()
    351 
    352 static int pmap_get_page(paddr_t *);
    353 static void pmap_free_page(paddr_t, sparc64_cpuset_t);
    354 static void pmap_free_page_noflush(paddr_t);
    355 
    356 /*
    357  * Global pmap locks.
    358  */
    359 static kmutex_t pmap_lock;
    360 static bool lock_available = false;
    361 
    362 /*
    363  * Support for big page sizes.  This maps the page size to the
    364  * page bits.  That is: these are the bits between 8K pages and
    365  * larger page sizes that cause aliasing.
    366  */
    367 #define PSMAP_ENTRY(MASK, CODE)	{ .mask = MASK, .code = CODE }
    368 struct page_size_map page_size_map[] = {
    369 #ifdef DEBUG
    370 	PSMAP_ENTRY(0, PGSZ_8K & 0),	/* Disable large pages */
    371 #endif
    372 	PSMAP_ENTRY((4 * 1024 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_4M),
    373 	PSMAP_ENTRY((512 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_512K),
    374 	PSMAP_ENTRY((64 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_64K),
    375 	PSMAP_ENTRY((8 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_8K),
    376 	PSMAP_ENTRY(0, 0),
    377 };
    378 
    379 /*
    380  * This probably shouldn't be necessary, but it stops USIII machines from
    381  * breaking in general, and not just for MULTIPROCESSOR.
    382  */
    383 #define USE_LOCKSAFE_PSEG_GETSET
    384 #if defined(USE_LOCKSAFE_PSEG_GETSET)
    385 
    386 static kmutex_t pseg_lock;
    387 
    388 static __inline__ int64_t
    389 pseg_get_locksafe(struct pmap *pm, vaddr_t va)
    390 {
    391 	int64_t rv;
    392 	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
    393 
    394 	if (__predict_true(took_lock))
    395 		mutex_enter(&pseg_lock);
    396 	rv = pseg_get_real(pm, va);
    397 	if (__predict_true(took_lock))
    398 		mutex_exit(&pseg_lock);
    399 	return rv;
    400 }
    401 
    402 static __inline__ int
    403 pseg_set_locksafe(struct pmap *pm, vaddr_t va, int64_t data, paddr_t ptp)
    404 {
    405 	int rv;
    406 	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
    407 
    408 	if (__predict_true(took_lock))
    409 		mutex_enter(&pseg_lock);
    410 	rv = pseg_set_real(pm, va, data, ptp);
    411 	if (__predict_true(took_lock))
    412 		mutex_exit(&pseg_lock);
    413 	return rv;
    414 }
    415 
    416 #define pseg_get(pm, va)		pseg_get_locksafe(pm, va)
    417 #define pseg_set(pm, va, data, ptp)	pseg_set_locksafe(pm, va, data, ptp)
    418 
    419 #else /* USE_LOCKSAFE_PSEG_GETSET */
    420 
    421 #define pseg_get(pm, va)		pseg_get_real(pm, va)
    422 #define pseg_set(pm, va, data, ptp)	pseg_set_real(pm, va, data, ptp)
    423 
    424 #endif /* USE_LOCKSAFE_PSEG_GETSET */
    425 
    426 /*
    427  * Enter a TTE into the kernel pmap only.  Don't do anything else.
    428  *
    429  * Use only during bootstrapping since it does no locking and
    430  * can lose ref/mod info!!!!
    431  *
    432  */
    433 static void pmap_enter_kpage(vaddr_t va, int64_t data)
    434 {
    435 	paddr_t newp;
    436 
    437 	newp = 0UL;
    438 	while (pseg_set(pmap_kernel(), va, data, newp) & 1) {
    439 		if (!pmap_get_page(&newp)) {
    440 			prom_printf("pmap_enter_kpage: out of pages\n");
    441 			panic("pmap_enter_kpage");
    442 		}
    443 
    444 		ENTER_STAT(ptpneeded);
    445 		BDPRINTF(PDB_BOOT1,
    446 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\n",
    447 			  pmap_kernel(), va, (long)data, (long)newp));
    448 		if (pmapdebug & PDB_BOOT1)
    449 		{int i; for (i=0; i<140000000; i++) ;}
    450 	}
    451 }
    452 
    453 /*
    454  * Check the bootargs to see if we need to enable bootdebug.
    455  */
    456 #ifdef DEBUG
    457 static void pmap_bootdebug(void)
    458 {
    459 	const char *cp = prom_getbootargs();
    460 
    461 	for (;;)
    462 		switch (*++cp) {
    463 		case '\0':
    464 			return;
    465 		case 'V':
    466 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
    467 			break;
    468 		case 'D':
    469 			pmapdebug |= PDB_BOOT1;
    470 			break;
    471 		}
    472 }
    473 #else
    474 #define pmap_bootdebug()	/* nothing */
    475 #endif
    476 
    477 
    478 /*
    479  * Calculate the correct number of page colors to use.  This should be the
    480  * size of the E$/PAGE_SIZE.  However, different CPUs can have different sized
    481  * E$, so we need to take the GCM of the E$ size.
    482  */
    483 static int pmap_calculate_colors(void)
    484 {
    485 	int node;
    486 	int size, assoc, color, maxcolor = 1;
    487 
    488 	for (node = prom_firstchild(prom_findroot()); node != 0;
    489 	     node = prom_nextsibling(node)) {
    490 		char *name = prom_getpropstring(node, "device_type");
    491 		if (strcmp("cpu", name) != 0)
    492 			continue;
    493 
    494 		/* Found a CPU, get the E$ info. */
    495 		size = cpu_ecache_size(node);
    496 		if (size == 0) {
    497 			prom_printf("pmap_calculate_colors: node %x has "
    498 				"no ecache-size\n", node);
    499 			/* If we can't get the E$ size, skip the node */
    500 			continue;
    501 		}
    502 
    503 		assoc = cpu_ecache_associativity(node);
    504 		color = size/assoc/PAGE_SIZE;
    505 		if (color > maxcolor)
    506 			maxcolor = color;
    507 	}
    508 	return (maxcolor);
    509 }
    510 
    511 static void pmap_alloc_bootargs(void)
    512 {
    513 	char *v;
    514 
    515 	v = OF_claim(NULL, 2*PAGE_SIZE, PAGE_SIZE);
    516 	if ((v == NULL) || (v == (void*)-1))
    517 		panic("Can't claim two pages of memory.");
    518 
    519 	memset(v, 0, 2*PAGE_SIZE);
    520 
    521 	cpu_args = (struct cpu_bootargs*)v;
    522 }
    523 
    524 #if defined(MULTIPROCESSOR)
    525 static void pmap_mp_init(void);
    526 
    527 static void
    528 pmap_mp_init(void)
    529 {
    530 	pte_t *tp;
    531 	char *v;
    532 	int i;
    533 
    534 	extern void cpu_mp_startup(void);
    535 
    536 	if ((v = OF_claim(NULL, PAGE_SIZE, PAGE_SIZE)) == NULL) {
    537 		panic("pmap_mp_init: Cannot claim a page.");
    538 	}
    539 
    540 	memcpy(v, mp_tramp_code, mp_tramp_code_len);
    541 	*(u_long *)(v + mp_tramp_dtlb_slots) = kernel_dtlb_slots;
    542 	*(u_long *)(v + mp_tramp_itlb_slots) = kernel_itlb_slots;
    543 	*(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup;
    544 	*(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args;
    545 	tp = (pte_t *)(v + mp_tramp_code_len);
    546 	for (i = 0; i < kernel_dtlb_slots; i++) {
    547 		tp[i].tag  = kernel_tlbs[i].te_va;
    548 		tp[i].data = TSB_DATA(0,		/* g */
    549 				PGSZ_4M,		/* sz */
    550 				kernel_tlbs[i].te_pa,	/* pa */
    551 				1, /* priv */
    552 				0, /* write */
    553 				1, /* cache */
    554 				1, /* aliased */
    555 				1, /* valid */
    556 				0, /* ie */
    557 				0  /* wc */);
    558 		tp[i].data |= TLB_L | TLB_CV;
    559 
    560 		if (i >= kernel_itlb_slots) {
    561 			tp[i].data |= TLB_W;
    562 		} else {
    563 			if (CPU_ISSUN4V)
    564 				tp[i].data |= SUN4V_TLB_X;
    565 		}
    566 
    567 		DPRINTF(PDB_BOOT1, ("xtlb[%d]: Tag: %" PRIx64 " Data: %"
    568 				PRIx64 "\n", i, tp[i].tag, tp[i].data));
    569 	}
    570 
    571 	for (i = 0; i < PAGE_SIZE; i += sizeof(long))
    572 		sparc_flush_icache(v + i);
    573 
    574 	cpu_spinup_trampoline = (vaddr_t)v;
    575 }
    576 #else
    577 #define pmap_mp_init()	((void)0)
    578 #endif
    579 
    580 paddr_t pmap_kextract(vaddr_t va);
    581 
    582 paddr_t
    583 pmap_kextract(vaddr_t va)
    584 {
    585 	int i;
    586 	paddr_t paddr = (paddr_t)-1;
    587 
    588 	for (i = 0; i < kernel_dtlb_slots; i++) {
    589 		if ((va & ~PAGE_MASK_4M) == kernel_tlbs[i].te_va) {
    590 			paddr = kernel_tlbs[i].te_pa +
    591 				(paddr_t)(va & PAGE_MASK_4M);
    592 			break;
    593 		}
    594 	}
    595 
    596 	if (i == kernel_dtlb_slots) {
    597 		panic("pmap_kextract: Address %p is not from kernel space.\n"
    598 				"Data segment is too small?\n", (void*)va);
    599 	}
    600 
    601 	return (paddr);
    602 }
    603 
    604 /*
    605  * Bootstrap kernel allocator, allocates from unused space in 4MB kernel
    606  * data segment meaning that
    607  *
    608  * - Access to allocated memory will never generate a trap
    609  * - Allocated chunks are never reclaimed or freed
    610  * - Allocation calls do not change PROM memlists
    611  */
    612 static struct mem_region kdata_mem_pool;
    613 
    614 static void
    615 kdata_alloc_init(vaddr_t va_start, vaddr_t va_end)
    616 {
    617 	vsize_t va_size = va_end - va_start;
    618 
    619 	kdata_mem_pool.start = va_start;
    620 	kdata_mem_pool.size  = va_size;
    621 
    622 	BDPRINTF(PDB_BOOT, ("kdata_alloc_init(): %d bytes @%p.\n", va_size,
    623 				va_start));
    624 }
    625 
    626 static vaddr_t
    627 kdata_alloc(vsize_t size, vsize_t align)
    628 {
    629 	vaddr_t va;
    630 	vsize_t asize;
    631 
    632 	asize = roundup(kdata_mem_pool.start, align) - kdata_mem_pool.start;
    633 
    634 	kdata_mem_pool.start += asize;
    635 	kdata_mem_pool.size  -= asize;
    636 
    637 	if (kdata_mem_pool.size < size) {
    638 		panic("kdata_alloc(): Data segment is too small.\n");
    639 	}
    640 
    641 	va = kdata_mem_pool.start;
    642 	kdata_mem_pool.start += size;
    643 	kdata_mem_pool.size  -= size;
    644 
    645 	BDPRINTF(PDB_BOOT, ("kdata_alloc(): Allocated %d@%p, %d free.\n",
    646 				size, (void*)va, kdata_mem_pool.size));
    647 
    648 	return (va);
    649 }
    650 
    651 /*
    652  * Unified routine for reading PROM properties.
    653  */
    654 static void
    655 pmap_read_memlist(const char *device, const char *property, void **ml,
    656 		  int *ml_size, vaddr_t (* ml_alloc)(vsize_t, vsize_t))
    657 {
    658 	void *va;
    659 	int size, handle;
    660 
    661 	if ( (handle = prom_finddevice(device)) == 0) {
    662 		prom_printf("pmap_read_memlist(): No %s device found.\n",
    663 				device);
    664 		prom_halt();
    665 	}
    666 	if ( (size = OF_getproplen(handle, property)) < 0) {
    667 		prom_printf("pmap_read_memlist(): %s/%s has no length.\n",
    668 				device, property);
    669 		prom_halt();
    670 	}
    671 	if ( (va = (void*)(* ml_alloc)(size, sizeof(uint64_t))) == NULL) {
    672 		prom_printf("pmap_read_memlist(): Cannot allocate memlist.\n");
    673 		prom_halt();
    674 	}
    675 	if (OF_getprop(handle, property, va, size) <= 0) {
    676 		prom_printf("pmap_read_memlist(): Cannot read %s/%s.\n",
    677 				device, property);
    678 		prom_halt();
    679 	}
    680 
    681 	*ml = va;
    682 	*ml_size = size;
    683 }
    684 
    685 /*
    686  * This is called during bootstrap, before the system is really initialized.
    687  *
    688  * It's called with the start and end virtual addresses of the kernel.  We
    689  * bootstrap the pmap allocator now.  We will allocate the basic structures we
    690  * need to bootstrap the VM system here: the page frame tables, the TSB, and
    691  * the free memory lists.
    692  *
    693  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
    694  * separating the kernel text and data segments we really would need to
    695  * provide the start and end of each segment.  But we can't.  The rodata
    696  * segment is attached to the end of the kernel segment and has nothing to
    697  * delimit its end.  We could still pass in the beginning of the kernel and
    698  * the beginning and end of the data segment but we could also just as easily
    699  * calculate that all in here.
    700  *
    701  * To handle the kernel text, we need to do a reverse mapping of the start of
    702  * the kernel, then traverse the free memory lists to find out how big it is.
    703  */
    704 
    705 void
    706 pmap_bootstrap(u_long kernelstart, u_long kernelend)
    707 {
    708 #ifdef MODULAR
    709 	extern vaddr_t module_start, module_end;
    710 #endif
    711 	extern char etext[], data_start[];	/* start of data segment */
    712 	extern int msgbufmapped;
    713 	struct mem_region *mp, *mp1, *avail, *orig;
    714 	int i, j, pcnt, msgbufsiz;
    715 	size_t s, sz;
    716 	int64_t data;
    717 	vaddr_t va, intstk;
    718 	uint64_t phys_msgbuf;
    719 	paddr_t newp = 0;
    720 
    721 	void *prom_memlist;
    722 	int prom_memlist_size;
    723 
    724 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\n"));
    725 
    726 	/* XXX - incomplete spinup code for SUN4V */
    727 	if (CPU_ISSUN4V)
    728 		boothowto |= RB_MD1;
    729 
    730 	cache_setup_funcs();
    731 
    732 	/*
    733 	 * Calculate kernel size.
    734 	 */
    735 	ktext   = kernelstart;
    736 	ktextp  = pmap_kextract(ktext);
    737 	ektext  = roundup((vaddr_t)etext, PAGE_SIZE_4M);
    738 	ektextp = roundup(pmap_kextract((vaddr_t)etext), PAGE_SIZE_4M);
    739 
    740 	kdata   = (vaddr_t)data_start;
    741 	kdatap  = pmap_kextract(kdata);
    742 	ekdata  = roundup(kernelend, PAGE_SIZE_4M);
    743 	ekdatap = roundup(pmap_kextract(kernelend), PAGE_SIZE_4M);
    744 
    745 	BDPRINTF(PDB_BOOT, ("Virtual layout: text %lx-%lx, data %lx-%lx.\n",
    746 				ktext, ektext, kdata, ekdata));
    747 	BDPRINTF(PDB_BOOT, ("Physical layout: text %lx-%lx, data %lx-%lx.\n",
    748 				ktextp, ektextp, kdatap, ekdatap));
    749 
    750 	/* Initialize bootstrap allocator. */
    751 	kdata_alloc_init(kernelend + 1 * 1024 * 1024, ekdata);
    752 
    753 	/* make sure we have access to the mdesc data on SUN4V machines */
    754 	if (CPU_ISSUN4V) {
    755 		vaddr_t m_va;
    756 		psize_t m_len;
    757 		paddr_t m_pa;
    758 
    759 		m_len = mdesc_get_len();
    760 		m_va = kdata_alloc(m_len, 16);
    761 		m_pa = pmap_kextract(m_va);
    762 		mdesc_init(m_va, m_pa, m_len);
    763 	}
    764 
    765 	pmap_bootdebug();
    766 	pmap_alloc_bootargs();
    767 	pmap_mp_init();
    768 
    769 	/*
    770 	 * set machine page size
    771 	 */
    772 	uvmexp.pagesize = NBPG;
    773 	uvmexp.ncolors = pmap_calculate_colors();
    774 	uvm_md_init();
    775 
    776 	/*
    777 	 * Get hold or the message buffer.
    778 	 */
    779 	msgbufp = (struct kern_msgbuf *)(vaddr_t)MSGBUF_VA;
    780 	msgbufsiz = MSGBUFSIZE;
    781 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\n",
    782 			    (long)msgbufp, (long)msgbufsiz));
    783 	if ((long)msgbufp !=
    784 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
    785 		prom_printf(
    786 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\n",
    787 		    (void *)msgbufp, (long)phys_msgbuf);
    788 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
    789 	BDPRINTF(PDB_BOOT,
    790 		("We should have the memory at %lx, let's map it in\n",
    791 			phys_msgbuf));
    792 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
    793 			  -1/* sunos does this */) == -1) {
    794 		prom_printf("Failed to map msgbuf\n");
    795 	} else {
    796 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\n",
    797 			(void *)msgbufp));
    798 	}
    799 	msgbufmapped = 1;	/* enable message buffer */
    800 	initmsgbuf((void *)msgbufp, msgbufsiz);
    801 
    802 	/*
    803 	 * Find out how much RAM we have installed.
    804 	 */
    805 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\n"));
    806 	pmap_read_memlist("/memory", "reg", &prom_memlist, &prom_memlist_size,
    807 			kdata_alloc);
    808 	phys_installed = prom_memlist;
    809 	phys_installed_size = prom_memlist_size / sizeof(*phys_installed);
    810 
    811 	if (pmapdebug & PDB_BOOT1) {
    812 		/* print out mem list */
    813 		prom_printf("Installed physical memory:\n");
    814 		for (i = 0; i < phys_installed_size; i++) {
    815 			prom_printf("memlist start %lx size %lx\n",
    816 					(u_long)phys_installed[i].start,
    817 					(u_long)phys_installed[i].size);
    818 		}
    819 	}
    820 
    821 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
    822 	for (i = 0; i < phys_installed_size; i++)
    823 		physmem += btoc(phys_installed[i].size);
    824 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\n",
    825 			     (int)physmem, (int)physmem));
    826 
    827 	/*
    828 	 * Calculate approx TSB size.  This probably needs tweaking.
    829 	 */
    830 	if (physmem < btoc(64 * 1024 * 1024))
    831 		tsbsize = 0;
    832 	else if (physmem < btoc(512 * 1024 * 1024))
    833 		tsbsize = 1;
    834 	else
    835 		tsbsize = 2;
    836 
    837 	/*
    838 	 * Save the prom translations
    839 	 */
    840 	pmap_read_memlist("/virtual-memory", "translations", &prom_memlist,
    841 			&prom_memlist_size, kdata_alloc);
    842 	prom_map = prom_memlist;
    843 	prom_map_size = prom_memlist_size / sizeof(struct prom_map);
    844 
    845 	if (pmapdebug & PDB_BOOT) {
    846 		/* print out mem list */
    847 		prom_printf("Prom xlations:\n");
    848 		for (i = 0; i < prom_map_size; i++) {
    849 			prom_printf("start %016lx size %016lx tte %016lx\n",
    850 				    (u_long)prom_map[i].vstart,
    851 				    (u_long)prom_map[i].vsize,
    852 				    (u_long)prom_map[i].tte);
    853 		}
    854 		prom_printf("End of prom xlations\n");
    855 	}
    856 
    857 	/*
    858 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
    859 	 * any translations inside the kernel data VA range.
    860 	 */
    861 	for (i = 0; i < prom_map_size; i++) {
    862 		for (j = i; j < prom_map_size; j++) {
    863 			if (prom_map[j].vstart > prom_map[i].vstart) {
    864 				struct prom_map tmp;
    865 
    866 				tmp = prom_map[i];
    867 				prom_map[i] = prom_map[j];
    868 				prom_map[j] = tmp;
    869 			}
    870 		}
    871 	}
    872 	if (pmapdebug & PDB_BOOT) {
    873 		/* print out mem list */
    874 		prom_printf("Prom xlations:\n");
    875 		for (i = 0; i < prom_map_size; i++) {
    876 			prom_printf("start %016lx size %016lx tte %016lx\n",
    877 				    (u_long)prom_map[i].vstart,
    878 				    (u_long)prom_map[i].vsize,
    879 				    (u_long)prom_map[i].tte);
    880 		}
    881 		prom_printf("End of prom xlations\n");
    882 	}
    883 
    884 	/*
    885 	 * Allocate a ncpu*64KB page for the cpu_info & stack structure now.
    886 	 */
    887 	cpu0paddr = prom_alloc_phys(8 * PAGE_SIZE * sparc_ncpus, 8 * PAGE_SIZE);
    888 	if (cpu0paddr == 0) {
    889 		prom_printf("Cannot allocate cpu_infos\n");
    890 		prom_halt();
    891 	}
    892 
    893 	/*
    894 	 * Now the kernel text segment is in its final location we can try to
    895 	 * find out how much memory really is free.
    896 	 */
    897 	pmap_read_memlist("/memory", "available", &prom_memlist,
    898 			&prom_memlist_size, kdata_alloc);
    899 	orig = prom_memlist;
    900 	sz  = prom_memlist_size;
    901 	pcnt = prom_memlist_size / sizeof(*orig);
    902 
    903 	BDPRINTF(PDB_BOOT1, ("Available physical memory:\n"));
    904 	avail = (struct mem_region*)kdata_alloc(sz, sizeof(uint64_t));
    905 	for (i = 0; i < pcnt; i++) {
    906 		avail[i] = orig[i];
    907 		BDPRINTF(PDB_BOOT1, ("memlist start %lx size %lx\n",
    908 					(u_long)orig[i].start,
    909 					(u_long)orig[i].size));
    910 	}
    911 	BDPRINTF(PDB_BOOT1, ("End of available physical memory\n"));
    912 
    913 	BDPRINTF(PDB_BOOT, ("ktext %08lx[%08lx] - %08lx[%08lx] : "
    914 				"kdata %08lx[%08lx] - %08lx[%08lx]\n",
    915 				(u_long)ktext, (u_long)ktextp,
    916 				(u_long)ektext, (u_long)ektextp,
    917 				(u_long)kdata, (u_long)kdatap,
    918 				(u_long)ekdata, (u_long)ekdatap));
    919 	if (pmapdebug & PDB_BOOT1) {
    920 		/* print out mem list */
    921 		prom_printf("Available %lx physical memory before cleanup:\n",
    922 			    (u_long)avail);
    923 		for (i = 0; i < pcnt; i++) {
    924 			prom_printf("memlist start %lx size %lx\n",
    925 				    (u_long)avail[i].start,
    926 				    (u_long)avail[i].size);
    927 		}
    928 		prom_printf("End of available physical memory before cleanup\n");
    929 		prom_printf("kernel physical text size %08lx - %08lx\n",
    930 			    (u_long)ktextp, (u_long)ektextp);
    931 		prom_printf("kernel physical data size %08lx - %08lx\n",
    932 			    (u_long)kdatap, (u_long)ekdatap);
    933 	}
    934 
    935 	/*
    936 	 * Here's a another quick in-lined bubble sort.
    937 	 */
    938 	for (i = 0; i < pcnt; i++) {
    939 		for (j = i; j < pcnt; j++) {
    940 			if (avail[j].start < avail[i].start) {
    941 				struct mem_region tmp;
    942 				tmp = avail[i];
    943 				avail[i] = avail[j];
    944 				avail[j] = tmp;
    945 			}
    946 		}
    947 	}
    948 
    949 	/* Throw away page zero if we have it. */
    950 	if (avail->start == 0) {
    951 		avail->start += PAGE_SIZE;
    952 		avail->size -= PAGE_SIZE;
    953 	}
    954 
    955 	/*
    956 	 * Now we need to remove the area we valloc'ed from the available
    957 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
    958 	 */
    959 	npgs = 0;
    960 	for (mp = avail, i = 0; i < pcnt; i++, mp = &avail[i]) {
    961 		/*
    962 		 * Now page align the start of the region.
    963 		 */
    964 		s = mp->start % PAGE_SIZE;
    965 		if (mp->size >= s) {
    966 			mp->size -= s;
    967 			mp->start += s;
    968 		}
    969 		/*
    970 		 * And now align the size of the region.
    971 		 */
    972 		mp->size -= mp->size % PAGE_SIZE;
    973 		/*
    974 		 * Check whether some memory is left here.
    975 		 */
    976 		if (mp->size == 0) {
    977 			memcpy(mp, mp + 1,
    978 			      (pcnt - (mp - avail)) * sizeof *mp);
    979 			pcnt--;
    980 			mp--;
    981 			continue;
    982 		}
    983 		s = mp->start;
    984 		sz = mp->size;
    985 		npgs += btoc(sz);
    986 		for (mp1 = avail; mp1 < mp; mp1++)
    987 			if (s < mp1->start)
    988 				break;
    989 		if (mp1 < mp) {
    990 			memcpy(mp1 + 1, mp1, (char *)mp - (char *)mp1);
    991 			mp1->start = s;
    992 			mp1->size = sz;
    993 		}
    994 #ifdef DEBUG
    995 /* Clear all memory we give to the VM system.  I want to make sure
    996  * the PROM isn't using it for something, so this should break the PROM.
    997  */
    998 
    999 /* Calling pmap_zero_page() at this point also hangs some machines
   1000  * so don't do it at all. -- pk 26/02/2002
   1001  */
   1002 #if 0
   1003 		{
   1004 			paddr_t p;
   1005 			for (p = mp->start; p < mp->start+mp->size;
   1006 			     p += PAGE_SIZE)
   1007 				pmap_zero_page(p);
   1008 		}
   1009 #endif
   1010 #endif /* DEBUG */
   1011 		/*
   1012 		 * In future we should be able to specify both allocated
   1013 		 * and free.
   1014 		 */
   1015 		BDPRINTF(PDB_BOOT1, ("uvm_page_physload(%lx, %lx)\n",
   1016 					(long)mp->start,
   1017 					(long)(mp->start + mp->size)));
   1018 		uvm_page_physload(
   1019 			atop(mp->start),
   1020 			atop(mp->start+mp->size),
   1021 			atop(mp->start),
   1022 			atop(mp->start+mp->size),
   1023 			VM_FREELIST_DEFAULT);
   1024 	}
   1025 
   1026 	if (pmapdebug & PDB_BOOT) {
   1027 		/* print out mem list */
   1028 		prom_printf("Available physical memory after cleanup:\n");
   1029 		for (i = 0; i < pcnt; i++) {
   1030 			prom_printf("avail start %lx size %lx\n",
   1031 				    (long)avail[i].start, (long)avail[i].size);
   1032 		}
   1033 		prom_printf("End of available physical memory after cleanup\n");
   1034 	}
   1035 
   1036 	/*
   1037 	 * Allocate and clear out pmap_kernel()->pm_segs[]
   1038 	 */
   1039 	pmap_kernel()->pm_refs = 1;
   1040 	memset(&pmap_kernel()->pm_ctx, 0, sizeof(pmap_kernel()->pm_ctx));
   1041 
   1042 	/* Throw away page zero */
   1043 	do {
   1044 		pmap_get_page(&newp);
   1045 	} while (!newp);
   1046 	pmap_kernel()->pm_segs=(paddr_t *)(u_long)newp;
   1047 	pmap_kernel()->pm_physaddr = newp;
   1048 
   1049 	/*
   1050 	 * finish filling out kernel pmap.
   1051 	 */
   1052 
   1053 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\n",
   1054 	    (long)pmap_kernel()->pm_physaddr));
   1055 	/*
   1056 	 * Tell pmap about our mesgbuf -- Hope this works already
   1057 	 */
   1058 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\n"));
   1059 	if (pmapdebug & PDB_BOOT1)
   1060 		consinit();
   1061 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\n"));
   1062 	/* it's not safe to call pmap_enter so we need to do this ourselves */
   1063 	va = (vaddr_t)msgbufp;
   1064 	while (msgbufsiz) {
   1065 		data = TSB_DATA(0 /* global */,
   1066 			PGSZ_8K,
   1067 			phys_msgbuf,
   1068 			1 /* priv */,
   1069 			1 /* Write */,
   1070 			1 /* Cacheable */,
   1071 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
   1072 			1 /* valid */,
   1073 			0 /* IE */,
   1074 			0 /* wc */);
   1075 		pmap_enter_kpage(va, data);
   1076 		va += PAGE_SIZE;
   1077 		msgbufsiz -= PAGE_SIZE;
   1078 		phys_msgbuf += PAGE_SIZE;
   1079 	}
   1080 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\n"));
   1081 
   1082 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\n"));
   1083 	for (i = 0; i < prom_map_size; i++)
   1084 		if (prom_map[i].vstart && ((prom_map[i].vstart >> 32) == 0))
   1085 			for (j = 0; j < prom_map[i].vsize; j += PAGE_SIZE) {
   1086 				int k;
   1087 
   1088 				for (k = 0; page_size_map[k].mask; k++) {
   1089 					if (((prom_map[i].vstart |
   1090 					      prom_map[i].tte) &
   1091 					      page_size_map[k].mask) == 0 &&
   1092 					      page_size_map[k].mask <
   1093 					      prom_map[i].vsize)
   1094 						break;
   1095 				}
   1096 				page_size_map[k].use++;
   1097 				/* Enter PROM map into pmap_kernel() */
   1098 				pmap_enter_kpage(prom_map[i].vstart + j,
   1099 					(prom_map[i].tte + j) | TLB_EXEC |
   1100 					page_size_map[k].code);
   1101 			}
   1102 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\n"));
   1103 
   1104 	/*
   1105 	 * Fix up start of kernel heap.
   1106 	 */
   1107 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
   1108 	/* Let's keep 1 page of redzone after the kernel */
   1109 	vmmap += PAGE_SIZE;
   1110 	{
   1111 		extern void main(void);
   1112 		vaddr_t u0va;
   1113 		paddr_t pa;
   1114 
   1115 		u0va = vmmap;
   1116 
   1117 		BDPRINTF(PDB_BOOT1,
   1118 			("Inserting lwp0 USPACE into pmap_kernel() at %p\n",
   1119 				vmmap));
   1120 
   1121 		while (vmmap < u0va + 2*USPACE) {
   1122 			int64_t data1;
   1123 
   1124 			if (!pmap_get_page(&pa))
   1125 				panic("pmap_bootstrap: no pages");
   1126 			prom_map_phys(pa, PAGE_SIZE, vmmap, -1);
   1127 			data1 = TSB_DATA(0 /* global */,
   1128 				PGSZ_8K,
   1129 				pa,
   1130 				1 /* priv */,
   1131 				1 /* Write */,
   1132 				1 /* Cacheable */,
   1133 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
   1134 				1 /* valid */,
   1135 				0 /* ei */,
   1136 				0 /* WC */);
   1137 			pmap_enter_kpage(vmmap, data1);
   1138 			vmmap += PAGE_SIZE;
   1139 		}
   1140 		BDPRINTF(PDB_BOOT1,
   1141 			 ("Done inserting stack 0 into pmap_kernel()\n"));
   1142 
   1143 		/* Now map in and initialize our cpu_info structure */
   1144 #ifdef DIAGNOSTIC
   1145 		vmmap += PAGE_SIZE; /* redzone -- XXXX do we need one? */
   1146 #endif
   1147 		if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK)
   1148 			vmmap += PAGE_SIZE; /* Matchup virtual color for D$ */
   1149 		intstk = vmmap;
   1150 		cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
   1151 
   1152 		BDPRINTF(PDB_BOOT1,
   1153 			("Inserting cpu_info into pmap_kernel() at %p\n",
   1154 				 cpus));
   1155 		/* Now map in all 8 pages of interrupt stack/cpu_info */
   1156 		pa = cpu0paddr;
   1157 		prom_map_phys(pa, 64*KB, vmmap, -1);
   1158 
   1159 		/*
   1160 		 * Also map it in as the interrupt stack.
   1161 		 * This lets the PROM see this if needed.
   1162 		 *
   1163 		 * XXXX locore.s does not flush these mappings
   1164 		 * before installing the locked TTE.
   1165 		 */
   1166 		prom_map_phys(pa, 64*KB, INTSTACK, -1);
   1167 		for (i = 0; i < 8; i++) {
   1168 			int64_t data1;
   1169 
   1170 			data1 = TSB_DATA(0 /* global */,
   1171 				PGSZ_8K,
   1172 				pa,
   1173 				1 /* priv */,
   1174 				1 /* Write */,
   1175 				1 /* Cacheable */,
   1176 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
   1177 				1 /* valid */,
   1178 				0 /* IE */,
   1179 				0 /* wc */);
   1180 			pmap_enter_kpage(vmmap, data1);
   1181 			vmmap += PAGE_SIZE;
   1182 			pa += PAGE_SIZE;
   1183 		}
   1184 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\n"));
   1185 
   1186 		/* Initialize our cpu_info structure */
   1187 		memset((void *)intstk, 0, 64 * KB);
   1188 		cpus->ci_self = cpus;
   1189 		cpus->ci_next = NULL;
   1190 		cpus->ci_curlwp = &lwp0;
   1191 		cpus->ci_flags = CPUF_PRIMARY;
   1192 		cpus->ci_cpuid = cpu_myid();
   1193 		cpus->ci_fplwp = NULL;
   1194 		cpus->ci_eintstack = NULL;
   1195 		cpus->ci_spinup = main; /* Call main when we're running. */
   1196 		cpus->ci_paddr = cpu0paddr;
   1197 		if (CPU_ISSUN4V) {
   1198 			cpus->ci_mmufsa = cpu0paddr;
   1199 			cpus->ci_tsb_desc = NULL;
   1200 		}
   1201 		cpus->ci_cpcb = (struct pcb *)u0va;
   1202 		cpus->ci_idepth = -1;
   1203 		memset(cpus->ci_intrpending, -1, sizeof(cpus->ci_intrpending));
   1204 
   1205 		uvm_lwp_setuarea(&lwp0, u0va);
   1206 		lwp0.l_md.md_tf = (struct trapframe64*)(u0va + USPACE
   1207 		    - sizeof(struct trapframe64));
   1208 
   1209 		cpu0paddr += 64 * KB;
   1210 
   1211 		CPUSET_CLEAR(cpus_active);
   1212 		CPUSET_ADD(cpus_active, 0);
   1213 
   1214 		cpu_pmap_prepare(cpus, true);
   1215 		cpu_pmap_init(cpus);
   1216 
   1217 		/* The rest will be done at CPU attach time. */
   1218 		BDPRINTF(PDB_BOOT1,
   1219 			 ("Done inserting cpu_info into pmap_kernel()\n"));
   1220 	}
   1221 
   1222 	vmmap = (vaddr_t)reserve_dumppages((void *)(u_long)vmmap);
   1223 
   1224 #ifdef MODULAR
   1225 	/*
   1226 	 * For 32bit kernels:
   1227 	 *   Reserve 16 MB of VA for module loading. Right now our full
   1228 	 *   GENERIC kernel is about 13 MB, so this looks good enough.
   1229 	 * For 64bit kernels:
   1230 	 *   We can use all the space left before the special addresses,
   1231 	 *   but leave 2 pages at vmmap alone (see pmap_virtual_space)
   1232 	 *   and another red zone page.
   1233 	 */
   1234 #ifdef __arch64__
   1235 	module_start = vmmap + 3*PAGE_SIZE;
   1236 	module_end = 0x08000000;	/* keep all modules within 2GB */
   1237 	KASSERT(module_end < KERNEND);	/* of kernel text */
   1238 #else
   1239 	module_start = vmmap;
   1240 	vmmap += 16 * 1024*1024;
   1241 	module_end = vmmap;
   1242 #endif
   1243 #endif
   1244 
   1245 	/*
   1246 	 * Set up bounds of allocatable memory for vmstat et al.
   1247 	 */
   1248 	avail_start = avail->start;
   1249 	for (mp = avail; mp->size; mp++)
   1250 		avail_end = mp->start+mp->size;
   1251 
   1252 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\n"));
   1253 
   1254 	BDPRINTF(PDB_BOOT, ("left kdata: %" PRId64 " @%" PRIx64 ".\n",
   1255 				kdata_mem_pool.size, kdata_mem_pool.start));
   1256 }
   1257 
   1258 /*
   1259  * Allocate TSBs for both mmus from the locked kernel data segment page.
   1260  * This is run before the cpu itself is activated (or by the first cpu
   1261  * itself)
   1262  */
   1263 void
   1264 cpu_pmap_prepare(struct cpu_info *ci, bool initial)
   1265 {
   1266 	/* allocate our TSBs */
   1267 	ci->ci_tsb_dmmu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
   1268 	ci->ci_tsb_immu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
   1269 	memset(ci->ci_tsb_dmmu, 0, TSBSIZE);
   1270 	memset(ci->ci_tsb_immu, 0, TSBSIZE);
   1271 	if (!initial) {
   1272 		KASSERT(ci != curcpu());
   1273 		/*
   1274 		 * Initially share ctxbusy with the boot cpu, the
   1275 		 * cpu will replace it as soon as it runs (and can
   1276 		 * probe the number of available contexts itself).
   1277 		 * Untill then only context 0 (aka kernel) will be
   1278 		 * referenced anyway.
   1279 		 */
   1280 		ci->ci_numctx = curcpu()->ci_numctx;
   1281 		ci->ci_ctxbusy = curcpu()->ci_ctxbusy;
   1282 	}
   1283 
   1284 	if (CPU_ISSUN4V) {
   1285 		ci->ci_tsb_desc = (struct tsb_desc *)kdata_alloc(
   1286 			sizeof(struct tsb_desc), 16);
   1287 		memset(ci->ci_tsb_desc, 0, sizeof(struct tsb_desc));
   1288 		/* 8K page size used for TSB index computation */
   1289 		ci->ci_tsb_desc->td_idxpgsz = 0;
   1290 		ci->ci_tsb_desc->td_assoc = 1;
   1291 		ci->ci_tsb_desc->td_size = TSBENTS;
   1292 		ci->ci_tsb_desc->td_ctxidx = -1;
   1293 		ci->ci_tsb_desc->td_pgsz = 0xf;
   1294 		ci->ci_tsb_desc->td_pa = pmap_kextract((vaddr_t)ci->ci_tsb_dmmu);
   1295 		BDPRINTF(PDB_BOOT1, ("cpu %d: TSB descriptor allocated at %p "
   1296 		    "size %08x - td_pa at %p\n",
   1297 		    ci->ci_index, ci->ci_tsb_desc, sizeof(struct tsb_desc),
   1298 		    ci->ci_tsb_desc->td_pa));
   1299 	}
   1300 
   1301 	BDPRINTF(PDB_BOOT1, ("cpu %d: TSB allocated at %p/%p size %08x\n",
   1302 	    ci->ci_index, ci->ci_tsb_dmmu, ci->ci_tsb_immu, TSBSIZE));
   1303 }
   1304 
   1305 /*
   1306  * Initialize the per CPU parts for the cpu running this code.
   1307  */
   1308 void
   1309 cpu_pmap_init(struct cpu_info *ci)
   1310 {
   1311 	size_t ctxsize;
   1312 
   1313 	/*
   1314 	 * We delay initialising ci_ctx_lock here as LOCKDEBUG isn't
   1315 	 * running for cpu0 yet..
   1316 	 */
   1317 	ci->ci_pmap_next_ctx = 1;
   1318 	/* all SUN4U use 13 bit contexts - SUN4V use at least 13 bit contexts */
   1319 	ci->ci_numctx = 0x2000;
   1320 	ctxsize = sizeof(paddr_t)*ci->ci_numctx;
   1321 	ci->ci_ctxbusy = (paddr_t *)kdata_alloc(ctxsize, sizeof(uint64_t));
   1322 	memset(ci->ci_ctxbusy, 0, ctxsize);
   1323 	LIST_INIT(&ci->ci_pmap_ctxlist);
   1324 
   1325 	/* mark kernel context as busy */
   1326 	ci->ci_ctxbusy[0] = pmap_kernel()->pm_physaddr;
   1327 }
   1328 
   1329 /*
   1330  * Initialize anything else for pmap handling.
   1331  * Called during vm_init().
   1332  */
   1333 void
   1334 pmap_init(void)
   1335 {
   1336 	struct vm_page *pg;
   1337 	struct pglist pglist;
   1338 	uint64_t data;
   1339 	paddr_t pa;
   1340 	psize_t size;
   1341 	vaddr_t va;
   1342 
   1343 	BDPRINTF(PDB_BOOT1, ("pmap_init()\n"));
   1344 
   1345 	size = sizeof(struct pv_entry) * physmem;
   1346 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
   1347 		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
   1348 		panic("pmap_init: no memory");
   1349 
   1350 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
   1351 	if (va == 0)
   1352 		panic("pmap_init: no memory");
   1353 
   1354 	/* Map the pages */
   1355 	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
   1356 		pa = VM_PAGE_TO_PHYS(pg);
   1357 		pmap_zero_page(pa);
   1358 		data = TSB_DATA(0 /* global */,
   1359 			PGSZ_8K,
   1360 			pa,
   1361 			1 /* priv */,
   1362 			1 /* Write */,
   1363 			1 /* Cacheable */,
   1364 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
   1365 			1 /* valid */,
   1366 			0 /* IE */,
   1367 			0 /* wc */);
   1368 		pmap_enter_kpage(va, data);
   1369 		va += PAGE_SIZE;
   1370 	}
   1371 
   1372 	/*
   1373 	 * initialize the pmap pools.
   1374 	 */
   1375 	pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap),
   1376 	    SPARC64_BLOCK_SIZE, 0, 0, "pmappl", NULL, IPL_NONE, NULL, NULL,
   1377 	    NULL);
   1378 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
   1379 	    PR_LARGECACHE, "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL);
   1380 
   1381 	vm_first_phys = avail_start;
   1382 	vm_num_phys = avail_end - avail_start;
   1383 
   1384 	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
   1385 #if defined(USE_LOCKSAFE_PSEG_GETSET)
   1386 	mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM);
   1387 #endif
   1388 	lock_available = true;
   1389 }
   1390 
   1391 /*
   1392  * How much virtual space is available to the kernel?
   1393  */
   1394 static vaddr_t kbreak; /* End of kernel VA */
   1395 void
   1396 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
   1397 {
   1398 
   1399 	/*
   1400 	 * Reserve one segment for kernel virtual memory.
   1401 	 */
   1402 #ifdef __arch64__
   1403 	/*
   1404 	 * On 64 bit kernels, start it beyond firmware, so
   1405 	 * we are basically unrestricted.
   1406 	 */
   1407 	*start = kbreak = VM_KERNEL_MEM_VA_START;
   1408 	*end = VM_MAX_KERNEL_ADDRESS;
   1409 #else
   1410 	/*
   1411 	 * Reserve two pages for pmap_copy_page && /dev/mem, but otherwise
   1412 	 * end it beyound the iospace and other special fixed addresses.
   1413 	 */
   1414 	*start = kbreak = (vaddr_t)(vmmap + 2*PAGE_SIZE);
   1415 	*end = VM_MAX_KERNEL_ADDRESS;
   1416 #endif
   1417 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\n", *start, *end));
   1418 }
   1419 
   1420 /*
   1421  * Preallocate kernel page tables to a specified VA.
   1422  * This simply loops through the first TTE for each
   1423  * page table from the beginning of the kernel pmap,
   1424  * reads the entry, and if the result is
   1425  * zero (either invalid entry or no page table) it stores
   1426  * a zero there, populating page tables in the process.
   1427  * This is not the most efficient technique but i don't
   1428  * expect it to be called that often.
   1429  */
   1430 vaddr_t
   1431 pmap_growkernel(vaddr_t maxkvaddr)
   1432 {
   1433 	struct pmap *pm = pmap_kernel();
   1434 	paddr_t pa;
   1435 
   1436 	if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) {
   1437 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
   1438 		       (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr);
   1439 		return (kbreak);
   1440 	}
   1441 	DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
   1442 	/* Align with the start of a page table */
   1443 	for (kbreak &= ((~0ULL) << PDSHIFT); kbreak < maxkvaddr;
   1444 	     kbreak += (1 << PDSHIFT)) {
   1445 		if (pseg_get(pm, kbreak) & TLB_V)
   1446 			continue;
   1447 
   1448 		pa = 0;
   1449 		while (pseg_set(pm, kbreak, 0, pa) & 1) {
   1450 			DPRINTF(PDB_GROW,
   1451 			    ("pmap_growkernel: extending %lx\n", kbreak));
   1452 			pa = 0;
   1453 			if (!pmap_get_page(&pa))
   1454 				panic("pmap_growkernel: no pages");
   1455 			ENTER_STAT(ptpneeded);
   1456 		}
   1457 	}
   1458 	return (kbreak);
   1459 }
   1460 
   1461 /*
   1462  * Create and return a physical map.
   1463  */
   1464 struct pmap *
   1465 pmap_create(void)
   1466 {
   1467 	struct pmap *pm;
   1468 
   1469 	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
   1470 
   1471 	pm = pool_cache_get(&pmap_cache, PR_WAITOK);
   1472 	memset(pm, 0, sizeof *pm);
   1473 	DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm));
   1474 
   1475 	pm->pm_refs = 1;
   1476 	TAILQ_INIT(&pm->pm_ptps);
   1477 	if (pm != pmap_kernel()) {
   1478 		while (!pmap_get_page(&pm->pm_physaddr)) {
   1479 			uvm_wait("pmap_create");
   1480 		}
   1481 		pm->pm_segs = (paddr_t *)(u_long)pm->pm_physaddr;
   1482 	}
   1483 	DPRINTF(PDB_CREATE, ("pmap_create(%p): ctx %d\n", pm, pmap_ctx(pm)));
   1484 	return pm;
   1485 }
   1486 
   1487 /*
   1488  * Add a reference to the given pmap.
   1489  */
   1490 void
   1491 pmap_reference(struct pmap *pm)
   1492 {
   1493 
   1494 	atomic_inc_uint(&pm->pm_refs);
   1495 }
   1496 
   1497 /*
   1498  * Retire the given pmap from service.
   1499  * Should only be called if the map contains no valid mappings.
   1500  */
   1501 void
   1502 pmap_destroy(struct pmap *pm)
   1503 {
   1504 #ifdef MULTIPROCESSOR
   1505 	struct cpu_info *ci;
   1506 	sparc64_cpuset_t pmap_cpus_active;
   1507 #else
   1508 #define pmap_cpus_active 0
   1509 #endif
   1510 	struct vm_page *pg;
   1511 
   1512 	membar_release();
   1513 	if ((int)atomic_dec_uint_nv(&pm->pm_refs) > 0) {
   1514 		return;
   1515 	}
   1516 	membar_acquire();
   1517 	DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
   1518 #ifdef MULTIPROCESSOR
   1519 	CPUSET_CLEAR(pmap_cpus_active);
   1520 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
   1521 		/* XXXMRG: Move the lock inside one or both tests? */
   1522 		mutex_enter(&ci->ci_ctx_lock);
   1523 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
   1524 			if (pm->pm_ctx[ci->ci_index] > 0) {
   1525 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
   1526 				ctx_free(pm, ci);
   1527 			}
   1528 		}
   1529 		mutex_exit(&ci->ci_ctx_lock);
   1530 	}
   1531 #else
   1532 	if (pmap_ctx(pm)) {
   1533 		mutex_enter(&curcpu()->ci_ctx_lock);
   1534 		ctx_free(pm, curcpu());
   1535 		mutex_exit(&curcpu()->ci_ctx_lock);
   1536 	}
   1537 #endif
   1538 
   1539 	/* we could be a little smarter and leave pages zeroed */
   1540 	while ((pg = TAILQ_FIRST(&pm->pm_ptps)) != NULL) {
   1541 		struct vm_page_md *md = VM_PAGE_TO_MD(pg);
   1542 
   1543 		TAILQ_REMOVE(&pm->pm_ptps, pg, pageq.queue);
   1544 		KASSERT(md->mdpg_pvh.pv_pmap == NULL);
   1545 		dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active);
   1546 		uvm_pagefree(pg);
   1547 	}
   1548 	pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active);
   1549 
   1550 	pool_cache_put(&pmap_cache, pm);
   1551 }
   1552 
   1553 /*
   1554  * Copy the range specified by src_addr/len
   1555  * from the source map to the range dst_addr/len
   1556  * in the destination map.
   1557  *
   1558  * This routine is only advisory and need not do anything.
   1559  */
   1560 void
   1561 pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr, vsize_t len, vaddr_t src_addr)
   1562 {
   1563 
   1564 	DPRINTF(PDB_CREATE, ("pmap_copy(%p, %p, %p, %lx, %p)\n",
   1565 			     dst_pmap, src_pmap, (void *)(u_long)dst_addr,
   1566 			     (u_long)len, (void *)(u_long)src_addr));
   1567 }
   1568 
   1569 /*
   1570  * Activate the address space for the specified process.  If the
   1571  * process is the current process, load the new MMU context.
   1572  */
   1573 void
   1574 pmap_activate(struct lwp *l)
   1575 {
   1576 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
   1577 
   1578 	if (pmap == pmap_kernel()) {
   1579 		return;
   1580 	}
   1581 
   1582 	/*
   1583 	 * This is essentially the same thing that happens in cpu_switchto()
   1584 	 * when the newly selected process is about to run, except that we
   1585 	 * have to make sure to clean the register windows before we set
   1586 	 * the new context.
   1587 	 */
   1588 
   1589 	if (l != curlwp) {
   1590 		return;
   1591 	}
   1592 	write_user_windows();
   1593 	pmap_activate_pmap(pmap);
   1594 }
   1595 
   1596 void
   1597 pmap_activate_pmap(struct pmap *pmap)
   1598 {
   1599 
   1600 	if (pmap_ctx(pmap) == 0) {
   1601 		(void) ctx_alloc(pmap);
   1602 	}
   1603 	DPRINTF(PDB_ACTIVATE,
   1604 		("%s: cpu%d activating ctx %d\n", __func__,
   1605 		 cpu_number(), pmap_ctx(pmap)));
   1606 	dmmu_set_secondary_context(pmap_ctx(pmap));
   1607 }
   1608 
   1609 /*
   1610  * Deactivate the address space of the specified process.
   1611  */
   1612 void
   1613 pmap_deactivate(struct lwp *l)
   1614 {
   1615 
   1616 	DPRINTF(PDB_ACTIVATE,
   1617 		("%s: cpu%d deactivating ctx %d\n", __func__,
   1618 		 cpu_number(), pmap_ctx(l->l_proc->p_vmspace->vm_map.pmap)));
   1619 }
   1620 
   1621 /*
   1622  * pmap_kenter_pa:		[ INTERFACE ]
   1623  *
   1624  *	Enter a va -> pa mapping into the kernel pmap without any
   1625  *	physical->virtual tracking.
   1626  *
   1627  *	Note: no locking is necessary in this function.
   1628  */
   1629 void
   1630 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
   1631 {
   1632 	pte_t tte;
   1633 	paddr_t ptp;
   1634 	struct pmap *pm = pmap_kernel();
   1635 	int i;
   1636 
   1637 	KASSERT(va < INTSTACK || va > EINTSTACK);
   1638 	KASSERT(va < kdata || va > ekdata);
   1639 
   1640 	/*
   1641 	 * Construct the TTE.
   1642 	 */
   1643 
   1644 	ENTER_STAT(unmanaged);
   1645 	if (pa & (PMAP_NVC|PMAP_NC)) {
   1646 		ENTER_STAT(ci);
   1647 	}
   1648 
   1649 	tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
   1650 			    (VM_PROT_WRITE & prot),
   1651 			    !(pa & PMAP_NC), pa & (PMAP_NVC), 1,
   1652 			    pa & (PMAP_LITTLE), pa & PMAP_WC);
   1653 	/* We don't track mod/ref here. */
   1654 	if (prot & VM_PROT_WRITE)
   1655 		tte.data |= TLB_REAL_W|TLB_W;
   1656 	if (prot & VM_PROT_EXECUTE)
   1657 		tte.data |= TLB_EXEC;
   1658 	tte.data |= TLB_TSB_LOCK;	/* wired */
   1659 	ptp = 0;
   1660 
   1661  retry:
   1662 	i = pseg_set(pm, va, tte.data, ptp);
   1663 	if (i & 1) {
   1664 		KASSERT((i & 4) == 0);
   1665 		ptp = 0;
   1666 		if (!pmap_get_page(&ptp))
   1667 			panic("pmap_kenter_pa: no pages");
   1668 		ENTER_STAT(ptpneeded);
   1669 		goto retry;
   1670 	}
   1671 	if (ptp && i == 0) {
   1672 		/* We allocated a spare page but didn't use it.  Free it. */
   1673 		printf("pmap_kenter_pa: freeing unused page %llx\n",
   1674 		       (long long)ptp);
   1675 		pmap_free_page_noflush(ptp);
   1676 	}
   1677 #ifdef PMAP_DEBUG
   1678 	i = ptelookup_va(va);
   1679 	if (pmapdebug & PDB_ENTER)
   1680 		prom_printf("pmap_kenter_pa: va=%08x data=%08x:%08x "
   1681 			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
   1682 			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
   1683 	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
   1684 		prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x "
   1685 			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
   1686 			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
   1687 			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data,
   1688 			i, &curcpu()->ci_tsb_dmmu[i]);
   1689 		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
   1690 			va, (int)(tte.data>>32), (int)tte.data,	i,
   1691 			&curcpu()->ci_tsb_dmmu[i]);
   1692 	}
   1693 #endif
   1694 }
   1695 
   1696 /*
   1697  * pmap_kremove:		[ INTERFACE ]
   1698  *
   1699  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
   1700  *	for size bytes (assumed to be page rounded).
   1701  */
   1702 void
   1703 pmap_kremove(vaddr_t va, vsize_t size)
   1704 {
   1705 	struct pmap *pm = pmap_kernel();
   1706 	int64_t data;
   1707 	paddr_t pa;
   1708 	int rv;
   1709 	bool flush = FALSE;
   1710 
   1711 	KASSERT(va < INTSTACK || va > EINTSTACK);
   1712 	KASSERT(va < kdata || va > ekdata);
   1713 
   1714 	DPRINTF(PDB_DEMAP, ("pmap_kremove: start 0x%lx size %lx\n", va, size));
   1715 	for (; size >= PAGE_SIZE; va += PAGE_SIZE, size -= PAGE_SIZE) {
   1716 
   1717 #ifdef DIAGNOSTIC
   1718 		/*
   1719 		 * Is this part of the permanent 4MB mapping?
   1720 		 */
   1721 		if (va >= ktext && va < roundup(ekdata, 4*MEG))
   1722 			panic("pmap_kremove: va=%08x in locked TLB", (u_int)va);
   1723 #endif
   1724 
   1725 		data = pseg_get(pm, va);
   1726 		if ((data & TLB_V) == 0) {
   1727 			continue;
   1728 		}
   1729 
   1730 		flush = TRUE;
   1731 		pa = data & TLB_PA_MASK;
   1732 
   1733 		/*
   1734 		 * We need to flip the valid bit and
   1735 		 * clear the access statistics.
   1736 		 */
   1737 
   1738 		rv = pseg_set(pm, va, 0, 0);
   1739 		if (rv & 1)
   1740 			panic("pmap_kremove: pseg_set needs spare, rv=%d\n",
   1741 			    rv);
   1742 		DPRINTF(PDB_DEMAP, ("pmap_kremove: seg %x pdir %x pte %x\n",
   1743 		    (int)va_to_seg(va), (int)va_to_dir(va),
   1744 		    (int)va_to_pte(va)));
   1745 		REMOVE_STAT(removes);
   1746 
   1747 		tsb_invalidate(va, pm);
   1748 		REMOVE_STAT(tflushes);
   1749 
   1750 		/*
   1751 		 * Here we assume nothing can get into the TLB
   1752 		 * unless it has a PTE.
   1753 		 */
   1754 
   1755 		tlb_flush_pte(va, pm);
   1756 		dcache_flush_page_all(pa);
   1757 	}
   1758 	if (flush)
   1759 		REMOVE_STAT(flushes);
   1760 }
   1761 
   1762 /*
   1763  * Insert physical page at pa into the given pmap at virtual address va.
   1764  * Supports 64-bit pa so we can map I/O space.
   1765  */
   1766 
   1767 int
   1768 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
   1769 {
   1770 	pte_t tte;
   1771 	int64_t data;
   1772 	paddr_t opa = 0, ptp; /* XXX: gcc */
   1773 	pv_entry_t pvh, opv = NULL, npv;
   1774 	struct vm_page *pg, *opg, *ptpg;
   1775 	int s, i, uncached = 0, error = 0;
   1776 	int size = PGSZ_8K; /* PMAP_SZ_TO_TTE(pa); */
   1777 	bool wired = (flags & PMAP_WIRED) != 0;
   1778 	bool wasmapped = false;
   1779 	bool dopv = true;
   1780 
   1781 	/*
   1782 	 * Is this part of the permanent mappings?
   1783 	 */
   1784 	KASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
   1785 	KASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
   1786 
   1787 	/*
   1788 	 * Grab a spare PV.  Keep going even if this fails since we don't
   1789 	 * yet know if we will need it.
   1790 	 */
   1791 
   1792 	npv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
   1793 
   1794 	/*
   1795 	 * If a mapping at this address already exists, check if we're
   1796 	 * entering the same PA again.  if it's different remove it.
   1797 	 */
   1798 
   1799 	mutex_enter(&pmap_lock);
   1800 	data = pseg_get(pm, va);
   1801 	if (data & TLB_V) {
   1802 		wasmapped = TRUE;
   1803 		opa = data & TLB_PA_MASK;
   1804 		if (opa != pa) {
   1805 			opg = PHYS_TO_VM_PAGE(opa);
   1806 			if (opg != NULL) {
   1807 				opv = pmap_remove_pv(pm, va, opg);
   1808 			}
   1809 		}
   1810 	}
   1811 
   1812 	/*
   1813 	 * Construct the TTE.
   1814 	 */
   1815 	pg = PHYS_TO_VM_PAGE(pa);
   1816 	if (pg) {
   1817 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   1818 
   1819 		pvh = &md->mdpg_pvh;
   1820 		uncached = (pvh->pv_va & (PV_ALIAS|PV_NVC));
   1821 #ifdef DIAGNOSTIC
   1822 		if ((flags & VM_PROT_ALL) & ~prot)
   1823 			panic("pmap_enter: access_type exceeds prot");
   1824 #endif
   1825 		/*
   1826 		 * If we don't have the traphandler do it,
   1827 		 * set the ref/mod bits now.
   1828 		 */
   1829 		if (flags & VM_PROT_ALL)
   1830 			pvh->pv_va |= PV_REF;
   1831 		if (flags & VM_PROT_WRITE)
   1832 			pvh->pv_va |= PV_MOD;
   1833 
   1834 		/*
   1835 		 * make sure we have a pv entry ready if we need one.
   1836 		 */
   1837 		if (wasmapped && opa == pa) {
   1838 			dopv = false;
   1839 		} else if (npv == NULL) {
   1840 			npv = opv;
   1841 			opv = NULL;
   1842 			if (npv == NULL) {
   1843 				mutex_exit(&pmap_lock);
   1844 				error = ENOMEM;
   1845 				goto out;
   1846 			}
   1847 		}
   1848 		ENTER_STAT(managed);
   1849 	} else {
   1850 		ENTER_STAT(unmanaged);
   1851 		dopv = false;
   1852 	}
   1853 
   1854 #ifndef NO_VCACHE
   1855 	if (pa & PMAP_NVC)
   1856 #endif
   1857 		uncached = 1;
   1858 	if (uncached) {
   1859 		ENTER_STAT(ci);
   1860 	}
   1861 	tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(),
   1862 		flags & VM_PROT_WRITE, !(pa & PMAP_NC),
   1863 		uncached, 1, pa & PMAP_LITTLE, pa & PMAP_WC);
   1864 #ifdef HWREF
   1865 	if (prot & VM_PROT_WRITE)
   1866 		tte.data |= TLB_REAL_W;
   1867 	if (prot & VM_PROT_EXECUTE)
   1868 		tte.data |= TLB_EXEC;
   1869 #else
   1870 	/* If it needs ref accounting do nothing. */
   1871 	if (!(flags & VM_PROT_READ)) {
   1872 		mutex_exit(&pmap_lock);
   1873 		goto out;
   1874 	}
   1875 #endif
   1876 	if (flags & VM_PROT_EXECUTE) {
   1877 		if ((flags & (VM_PROT_READ|VM_PROT_WRITE)) == 0)
   1878 			tte.data |= TLB_EXEC_ONLY|TLB_EXEC;
   1879 		else
   1880 			tte.data |= TLB_EXEC;
   1881 	}
   1882 	if (wired)
   1883 		tte.data |= TLB_TSB_LOCK;
   1884 	ptp = 0;
   1885 
   1886  retry:
   1887 	i = pseg_set(pm, va, tte.data, ptp);
   1888 	if (i == -2) {
   1889 		if (flags & PMAP_CANFAIL)
   1890 			return (ENOMEM);
   1891 		panic("pmap_enter: invalid VA (inside hole)");
   1892 	}
   1893 	if (i & 4) {
   1894 		/* ptp used as L3 */
   1895 		KASSERT(ptp != 0);
   1896 		KASSERT((i & 3) == 0);
   1897 		ptpg = PHYS_TO_VM_PAGE(ptp);
   1898 		if (ptpg) {
   1899 			ptpg->offset = (uint64_t)va & (0xfffffLL << 23);
   1900 			TAILQ_INSERT_TAIL(&pm->pm_ptps, ptpg, pageq.queue);
   1901 		} else {
   1902 			KASSERT(pm == pmap_kernel());
   1903 		}
   1904 	}
   1905 	if (i & 2) {
   1906 		/* ptp used as L2 */
   1907 		KASSERT(ptp != 0);
   1908 		KASSERT((i & 4) == 0);
   1909 		ptpg = PHYS_TO_VM_PAGE(ptp);
   1910 		if (ptpg) {
   1911 			ptpg->offset = (((uint64_t)va >> 43) & 0x3ffLL) << 13;
   1912 			TAILQ_INSERT_TAIL(&pm->pm_ptps, ptpg, pageq.queue);
   1913 		} else {
   1914 			KASSERT(pm == pmap_kernel());
   1915 		}
   1916 	}
   1917 	if (i & 1) {
   1918 		KASSERT((i & 4) == 0);
   1919 		ptp = 0;
   1920 		if (!pmap_get_page(&ptp)) {
   1921 			mutex_exit(&pmap_lock);
   1922 			if (flags & PMAP_CANFAIL) {
   1923 				error = ENOMEM;
   1924 				goto out;
   1925 			} else {
   1926 				panic("pmap_enter: no pages");
   1927 			}
   1928 		}
   1929 		ENTER_STAT(ptpneeded);
   1930 		goto retry;
   1931 	}
   1932 	if (ptp && i == 0) {
   1933 		/* We allocated a spare page but didn't use it.  Free it. */
   1934 		printf("pmap_enter: freeing unused page %llx\n",
   1935 		       (long long)ptp);
   1936 		pmap_free_page_noflush(ptp);
   1937 	}
   1938 	if (dopv) {
   1939 		pmap_enter_pv(pm, va, pa, pg, &npv);
   1940 	}
   1941 
   1942 	mutex_exit(&pmap_lock);
   1943 #ifdef PMAP_DEBUG
   1944 	i = ptelookup_va(va);
   1945 	if (pmapdebug & PDB_ENTER)
   1946 		prom_printf("pmap_enter: va=%08x data=%08x:%08x "
   1947 			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
   1948 			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
   1949 	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
   1950 		prom_printf("pmap_enter: evicting entry tag=%x:%08x "
   1951 			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
   1952 			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
   1953 			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data, i,
   1954 			&curcpu()->ci_tsb_dmmu[i]);
   1955 		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
   1956 			va, (int)(tte.data>>32), (int)tte.data, i,
   1957 			&curcpu()->ci_tsb_dmmu[i]);
   1958 	}
   1959 #endif
   1960 
   1961 	if (flags & (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)) {
   1962 
   1963 		/*
   1964 		 * preload the TSB with the new entry,
   1965 		 * since we're going to need it immediately anyway.
   1966 		 */
   1967 
   1968 		KASSERT(pmap_ctx(pm)>=0);
   1969 		i = ptelookup_va(va);
   1970 		tte.tag = TSB_TAG(0, pmap_ctx(pm), va);
   1971 		s = splhigh();
   1972 		if (wasmapped && pmap_is_on_mmu(pm)) {
   1973 			tsb_invalidate(va, pm);
   1974 		}
   1975 		if (flags & (VM_PROT_READ | VM_PROT_WRITE)) {
   1976 			curcpu()->ci_tsb_dmmu[i].tag = tte.tag;
   1977 			__asm volatile("" : : : "memory");
   1978 			curcpu()->ci_tsb_dmmu[i].data = tte.data;
   1979 		}
   1980 		if (flags & VM_PROT_EXECUTE) {
   1981 			curcpu()->ci_tsb_immu[i].tag = tte.tag;
   1982 			__asm volatile("" : : : "memory");
   1983 			curcpu()->ci_tsb_immu[i].data = tte.data;
   1984 		}
   1985 
   1986 		/*
   1987 		 * it's only necessary to flush the TLB if this page was
   1988 		 * previously mapped, but for some reason it's a lot faster
   1989 		 * for the fork+exit microbenchmark if we always do it.
   1990 		 */
   1991 
   1992 		KASSERT(pmap_ctx(pm)>=0);
   1993 #ifdef MULTIPROCESSOR
   1994 		if (wasmapped && pmap_is_on_mmu(pm))
   1995 			tlb_flush_pte(va, pm);
   1996 		else
   1997 			sp_tlb_flush_pte(va, pmap_ctx(pm));
   1998 #else
   1999 		tlb_flush_pte(va, pm);
   2000 #endif
   2001 		splx(s);
   2002 	} else if (wasmapped && pmap_is_on_mmu(pm)) {
   2003 		/* Force reload -- protections may be changed */
   2004 		KASSERT(pmap_ctx(pm)>=0);
   2005 		tsb_invalidate(va, pm);
   2006 		tlb_flush_pte(va, pm);
   2007 	}
   2008 
   2009 	/* We will let the fast mmu miss interrupt load the new translation */
   2010 	pv_check();
   2011  out:
   2012 	if (opv)
   2013 		pool_cache_put(&pmap_pv_cache, opv);
   2014 	if (npv)
   2015 		pool_cache_put(&pmap_pv_cache, npv);
   2016 
   2017 	return error;
   2018 }
   2019 
   2020 bool
   2021 pmap_remove_all(struct pmap *pm)
   2022 {
   2023 #ifdef MULTIPROCESSOR
   2024 	struct cpu_info *ci;
   2025 	sparc64_cpuset_t pmap_cpus_active;
   2026 #endif
   2027 
   2028 	if (pm == pmap_kernel()) {
   2029 		return false;
   2030 	}
   2031 	write_user_windows();
   2032 	pm->pm_refs = 0;
   2033 
   2034 	/*
   2035 	 * XXXMRG: pmap_destroy() does exactly the same dance here.
   2036 	 * surely one of them isn't necessary?
   2037 	 */
   2038 #ifdef MULTIPROCESSOR
   2039 	CPUSET_CLEAR(pmap_cpus_active);
   2040 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
   2041 		/* XXXMRG: Move the lock inside one or both tests? */
   2042 		mutex_enter(&ci->ci_ctx_lock);
   2043 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
   2044 			if (pm->pm_ctx[ci->ci_index] > 0) {
   2045 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
   2046 				ctx_free(pm, ci);
   2047 			}
   2048 		}
   2049 		mutex_exit(&ci->ci_ctx_lock);
   2050 	}
   2051 #else
   2052 	if (pmap_ctx(pm)) {
   2053 		mutex_enter(&curcpu()->ci_ctx_lock);
   2054 		ctx_free(pm, curcpu());
   2055 		mutex_exit(&curcpu()->ci_ctx_lock);
   2056 	}
   2057 #endif
   2058 
   2059 	REMOVE_STAT(flushes);
   2060 	/*
   2061 	 * XXXMRG: couldn't we do something less severe here, and
   2062 	 * only flush the right context on each CPU?
   2063 	 */
   2064 	blast_dcache();
   2065 	return false;
   2066 }
   2067 
   2068 /*
   2069  * Remove the given range of mapping entries.
   2070  */
   2071 void
   2072 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
   2073 {
   2074 	int64_t data;
   2075 	paddr_t pa;
   2076 	struct vm_page *pg;
   2077 	pv_entry_t pv, freepv = NULL;
   2078 	int rv;
   2079 	bool flush = FALSE;
   2080 
   2081 	/*
   2082 	 * In here we should check each pseg and if there are no more entries,
   2083 	 * free it.  It's just that linear scans of 8K pages gets expensive.
   2084 	 */
   2085 
   2086 	KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
   2087 	KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
   2088 
   2089 	mutex_enter(&pmap_lock);
   2090 	DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
   2091 			     (void *)(u_long)va, (void *)(u_long)endva));
   2092 	REMOVE_STAT(calls);
   2093 
   2094 	/* Now do the real work */
   2095 	for (; va < endva; va += PAGE_SIZE) {
   2096 #ifdef DIAGNOSTIC
   2097 		/*
   2098 		 * Is this part of the permanent 4MB mapping?
   2099 		 */
   2100 		if (pm == pmap_kernel() && va >= ktext &&
   2101 			va < roundup(ekdata, 4*MEG))
   2102 			panic("pmap_remove: va=%08llx in locked TLB",
   2103 			      (long long)va);
   2104 #endif
   2105 
   2106 		data = pseg_get(pm, va);
   2107 		if ((data & TLB_V) == 0) {
   2108 			continue;
   2109 		}
   2110 
   2111 		flush = TRUE;
   2112 		/* First remove the pv entry, if there is one */
   2113 		pa = data & TLB_PA_MASK;
   2114 		pg = PHYS_TO_VM_PAGE(pa);
   2115 		if (pg) {
   2116 			pv = pmap_remove_pv(pm, va, pg);
   2117 			if (pv != NULL) {
   2118 				/* free it */
   2119 				pv->pv_next = freepv;
   2120 				freepv = pv;
   2121 			}
   2122 		}
   2123 
   2124 		/*
   2125 		 * We need to flip the valid bit and
   2126 		 * clear the access statistics.
   2127 		 */
   2128 
   2129 		rv = pseg_set(pm, va, 0, 0);
   2130 		if (rv & 1)
   2131 			panic("pmap_remove: pseg_set needed spare, rv=%d!\n",
   2132 			    rv);
   2133 
   2134 		DPRINTF(PDB_REMOVE, (" clearing seg %x pte %x\n",
   2135 				     (int)va_to_seg(va), (int)va_to_pte(va)));
   2136 		REMOVE_STAT(removes);
   2137 
   2138 		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
   2139 			continue;
   2140 
   2141 		/*
   2142 		 * if the pmap is being torn down, don't bother flushing,
   2143 		 * we already have done so.
   2144 		 */
   2145 
   2146 		if (!pm->pm_refs)
   2147 			continue;
   2148 
   2149 		/*
   2150 		 * Here we assume nothing can get into the TLB
   2151 		 * unless it has a PTE.
   2152 		 */
   2153 
   2154 		KASSERT(pmap_ctx(pm)>=0);
   2155 		tsb_invalidate(va, pm);
   2156 		REMOVE_STAT(tflushes);
   2157 		tlb_flush_pte(va, pm);
   2158 		dcache_flush_page_all(pa);
   2159 	}
   2160 	if (flush && pm->pm_refs)
   2161 		REMOVE_STAT(flushes);
   2162 	DPRINTF(PDB_REMOVE, ("\n"));
   2163 	pv_check();
   2164 	mutex_exit(&pmap_lock);
   2165 
   2166 	/* Catch up on deferred frees. */
   2167 	for (; freepv != NULL; freepv = pv) {
   2168 		pv = freepv->pv_next;
   2169 		pool_cache_put(&pmap_pv_cache, freepv);
   2170 	}
   2171 }
   2172 
   2173 /*
   2174  * Change the protection on the specified range of this pmap.
   2175  */
   2176 void
   2177 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
   2178 {
   2179 	paddr_t pa;
   2180 	int64_t data;
   2181 	struct vm_page *pg;
   2182 	pv_entry_t pv;
   2183 	int rv;
   2184 
   2185 	KASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
   2186 	KASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
   2187 
   2188 	if (prot == VM_PROT_NONE) {
   2189 		pmap_remove(pm, sva, eva);
   2190 		return;
   2191 	}
   2192 
   2193 	sva = trunc_page(sva);
   2194 	mutex_enter(&pmap_lock);
   2195 	for (; sva < eva; sva += PAGE_SIZE) {
   2196 #ifdef PMAP_DEBUG
   2197 		/*
   2198 		 * Is this part of the permanent 4MB mapping?
   2199 		 */
   2200 		if (pm == pmap_kernel() && sva >= ktext &&
   2201 		    sva < roundup(ekdata, 4 * MEG)) {
   2202 			mutex_exit(&pmap_lock);
   2203 			prom_printf("pmap_protect: va=%08x in locked TLB\n",
   2204 			    sva);
   2205 			prom_abort();
   2206 			return;
   2207 		}
   2208 #endif
   2209 		DPRINTF(PDB_CHANGEPROT, ("pmap_protect: va %p\n",
   2210 		    (void *)(u_long)sva));
   2211 		data = pseg_get(pm, sva);
   2212 		if ((data & TLB_V) == 0) {
   2213 			continue;
   2214 		}
   2215 
   2216 		pa = data & TLB_PA_MASK;
   2217 		DPRINTF(PDB_CHANGEPROT|PDB_REF,
   2218 			("pmap_protect: va=%08x data=%08llx "
   2219 			 "seg=%08x pte=%08x\n",
   2220 			 (u_int)sva, (long long)pa, (int)va_to_seg(sva),
   2221 			 (int)va_to_pte(sva)));
   2222 
   2223 		pg = PHYS_TO_VM_PAGE(pa);
   2224 		if (pg) {
   2225 			struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2226 
   2227 			/* Save REF/MOD info */
   2228 			pv = &md->mdpg_pvh;
   2229 			if (data & TLB_ACCESS)
   2230 				pv->pv_va |= PV_REF;
   2231 			if (data & TLB_MODIFY)
   2232 				pv->pv_va |= PV_MOD;
   2233 		}
   2234 
   2235 		/* Just do the pmap and TSB, not the pv_list */
   2236 		if ((prot & VM_PROT_WRITE) == 0)
   2237 			data &= ~(TLB_W|TLB_REAL_W);
   2238 		if ((prot & VM_PROT_EXECUTE) == 0)
   2239 			data &= ~(TLB_EXEC);
   2240 
   2241 		rv = pseg_set(pm, sva, data, 0);
   2242 		if (rv & 1)
   2243 			panic("pmap_protect: pseg_set needs spare! rv=%d\n",
   2244 			    rv);
   2245 
   2246 		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
   2247 			continue;
   2248 
   2249 		KASSERT(pmap_ctx(pm)>=0);
   2250 		tsb_invalidate(sva, pm);
   2251 		tlb_flush_pte(sva, pm);
   2252 	}
   2253 	pv_check();
   2254 	mutex_exit(&pmap_lock);
   2255 }
   2256 
   2257 /*
   2258  * Extract the physical page address associated
   2259  * with the given map/virtual_address pair.
   2260  */
   2261 bool
   2262 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
   2263 {
   2264 	paddr_t pa;
   2265 	int64_t data = 0;
   2266 
   2267 	if (pm == pmap_kernel() && va >= kdata && va < roundup(ekdata, 4*MEG)) {
   2268 		/* Need to deal w/locked TLB entry specially. */
   2269 		pa = pmap_kextract(va);
   2270 		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
   2271 				      (u_long)va, (unsigned long long)pa));
   2272 		if (pap != NULL)
   2273 			*pap = pa;
   2274 		return TRUE;
   2275 	} else if (pm == pmap_kernel() && va >= ktext && va < ektext) {
   2276 		/* Need to deal w/locked TLB entry specially. */
   2277 		pa = pmap_kextract(va);
   2278 		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
   2279 		    (u_long)va, (unsigned long long)pa));
   2280 		if (pap != NULL)
   2281 			*pap = pa;
   2282 		return TRUE;
   2283 	} else if (pm == pmap_kernel() && va >= INTSTACK && va < (INTSTACK + 64*KB)) {
   2284 		pa = (paddr_t)(curcpu()->ci_paddr - INTSTACK + va);
   2285 		DPRINTF(PDB_EXTRACT, ("pmap_extract (intstack): va=%lx pa=%llx\n",
   2286 		    (u_long)va, (unsigned long long)pa));
   2287 		if (pap != NULL)
   2288 			*pap = pa;
   2289 		return TRUE;
   2290 	} else {
   2291 		data = pseg_get(pm, va);
   2292 		pa = data & TLB_PA_MASK;
   2293 		if (pmapdebug & PDB_EXTRACT) {
   2294 			paddr_t npa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)],
   2295 					   ASI_PHYS_CACHED);
   2296 			printf("pmap_extract: va=%p segs[%ld]=%llx",
   2297 			       (void *)(u_long)va, (long)va_to_seg(va),
   2298 			       (unsigned long long)npa);
   2299 			if (npa) {
   2300 				npa = (paddr_t)
   2301 					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
   2302 					     [va_to_dir(va)],
   2303 					     ASI_PHYS_CACHED);
   2304 				printf(" segs[%ld][%ld]=%lx",
   2305 				       (long)va_to_seg(va),
   2306 				       (long)va_to_dir(va), (long)npa);
   2307 			}
   2308 			if (npa)	{
   2309 				npa = (paddr_t)
   2310 					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
   2311 					     [va_to_pte(va)],
   2312 					     ASI_PHYS_CACHED);
   2313 				printf(" segs[%ld][%ld][%ld]=%lx",
   2314 				       (long)va_to_seg(va),
   2315 				       (long)va_to_dir(va),
   2316 				       (long)va_to_pte(va), (long)npa);
   2317 			}
   2318 			printf(" pseg_get: %lx\n", (long)pa);
   2319 		}
   2320 	}
   2321 	if ((data & TLB_V) == 0)
   2322 		return (FALSE);
   2323 	if (pap != NULL)
   2324 		*pap = pa + (va & PGOFSET);
   2325 	return (TRUE);
   2326 }
   2327 
   2328 /*
   2329  * Change protection on a kernel address.
   2330  * This should only be called from MD code.
   2331  */
   2332 void
   2333 pmap_kprotect(vaddr_t va, vm_prot_t prot)
   2334 {
   2335 	struct pmap *pm = pmap_kernel();
   2336 	int64_t data;
   2337 	int rv;
   2338 
   2339 	data = pseg_get(pm, va);
   2340 	KASSERT(data & TLB_V);
   2341 	if (prot & VM_PROT_WRITE) {
   2342 		data |= (TLB_W|TLB_REAL_W);
   2343 	} else {
   2344 		data &= ~(TLB_W|TLB_REAL_W);
   2345 	}
   2346 	rv = pseg_set(pm, va, data, 0);
   2347 	if (rv & 1)
   2348 		panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv);
   2349 	KASSERT(pmap_ctx(pm)>=0);
   2350 	tsb_invalidate(va, pm);
   2351 	tlb_flush_pte(va, pm);
   2352 }
   2353 
   2354 /*
   2355  * Return the number bytes that pmap_dumpmmu() will dump.
   2356  */
   2357 int
   2358 pmap_dumpsize(void)
   2359 {
   2360 	int	sz;
   2361 
   2362 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
   2363 	sz += kernel_dtlb_slots * sizeof(struct cpu_kcore_4mbseg);
   2364 	sz += phys_installed_size * sizeof(phys_ram_seg_t);
   2365 
   2366 	return btodb(sz + DEV_BSIZE - 1);
   2367 }
   2368 
   2369 /*
   2370  * Write the mmu contents to the dump device.
   2371  * This gets appended to the end of a crash dump since
   2372  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
   2373  *
   2374  * Write the core dump headers and MD data to the dump device.
   2375  * We dump the following items:
   2376  *
   2377  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
   2378  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
   2379  *	phys_ram_seg_t[phys_installed_size]  physical memory segments
   2380  */
   2381 int
   2382 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, void *, size_t), daddr_t blkno)
   2383 {
   2384 	kcore_seg_t	*kseg;
   2385 	cpu_kcore_hdr_t	*kcpu;
   2386 	phys_ram_seg_t	memseg;
   2387 	struct cpu_kcore_4mbseg ktlb;
   2388 	int	error = 0;
   2389 	int	i;
   2390 	int	buffer[dbtob(1) / sizeof(int)];
   2391 	int	*bp, *ep;
   2392 
   2393 #define EXPEDITE(p,n) do {						\
   2394 	int *sp = (void *)(p);						\
   2395 	int sz = (n);							\
   2396 	while (sz > 0) {						\
   2397 		*bp++ = *sp++;						\
   2398 		if (bp >= ep) {						\
   2399 			error = (*dump)(dumpdev, blkno,			\
   2400 					(void *)buffer, dbtob(1));	\
   2401 			if (error != 0)					\
   2402 				return (error);				\
   2403 			++blkno;					\
   2404 			bp = buffer;					\
   2405 		}							\
   2406 		sz -= 4;						\
   2407 	}								\
   2408 } while (0)
   2409 
   2410 	/* Setup bookkeeping pointers */
   2411 	bp = buffer;
   2412 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
   2413 
   2414 	/* Fill in MI segment header */
   2415 	kseg = (kcore_seg_t *)bp;
   2416 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
   2417 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
   2418 
   2419 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
   2420 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
   2421 	kcpu->cputype = cputyp;
   2422 	kcpu->kernbase = (uint64_t)KERNBASE;
   2423 	kcpu->cpubase = (uint64_t)CPUINFO_VA;
   2424 
   2425 	/* Describe the locked text segment */
   2426 	kcpu->ktextbase = (uint64_t)ktext;
   2427 	kcpu->ktextp = (uint64_t)ktextp;
   2428 	kcpu->ktextsz = (uint64_t)ektext - ktext;
   2429 	if (kcpu->ktextsz > 4*MEG)
   2430 		kcpu->ktextsz = 0;	/* old version can not work */
   2431 
   2432 	/* Describe locked data segment */
   2433 	kcpu->kdatabase = (uint64_t)kdata;
   2434 	kcpu->kdatap = (uint64_t)kdatap;
   2435 	kcpu->kdatasz = (uint64_t)ekdatap - kdatap;
   2436 
   2437 	/* new version of locked segments description */
   2438 	kcpu->newmagic = SPARC64_KCORE_NEWMAGIC;
   2439 	kcpu->num4mbsegs = kernel_dtlb_slots;
   2440 	kcpu->off4mbsegs = ALIGN(sizeof(cpu_kcore_hdr_t));
   2441 
   2442 	/* description of per-cpu mappings */
   2443 	kcpu->numcpuinfos = sparc_ncpus;
   2444 	kcpu->percpusz = 64 * 1024;	/* used to be 128k for some time */
   2445 	kcpu->thiscpu = cpu_number();	/* which cpu is doing this dump */
   2446 	kcpu->cpusp = cpu0paddr - 64 * 1024 * sparc_ncpus;
   2447 
   2448 	/* Now the memsegs */
   2449 	kcpu->nmemseg = phys_installed_size;
   2450 	kcpu->memsegoffset = kcpu->off4mbsegs
   2451 		+ kernel_dtlb_slots * sizeof(struct cpu_kcore_4mbseg);
   2452 
   2453 	/* Now we need to point this at our kernel pmap. */
   2454 	kcpu->nsegmap = STSZ;
   2455 	kcpu->segmapoffset = (uint64_t)pmap_kernel()->pm_physaddr;
   2456 
   2457 	/* Note: we have assumed everything fits in buffer[] so far... */
   2458 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
   2459 
   2460 	/* write locked kernel 4MB TLBs */
   2461 	for (i = 0; i < kernel_dtlb_slots; i++) {
   2462 		ktlb.va = kernel_tlbs[i].te_va;
   2463 		ktlb.pa = kernel_tlbs[i].te_pa;
   2464 		EXPEDITE(&ktlb, sizeof(ktlb));
   2465 	}
   2466 
   2467 	/* write memsegs */
   2468 	for (i = 0; i < phys_installed_size; i++) {
   2469 		memseg.start = phys_installed[i].start;
   2470 		memseg.size = phys_installed[i].size;
   2471 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
   2472 	}
   2473 
   2474 	if (bp != buffer)
   2475 		error = (*dump)(dumpdev, blkno++, (void *)buffer, dbtob(1));
   2476 
   2477 	return (error);
   2478 }
   2479 
   2480 /*
   2481  * Determine (non)existence of physical page
   2482  */
   2483 int
   2484 pmap_pa_exists(paddr_t pa)
   2485 {
   2486 	int i;
   2487 
   2488 	/* Just go through physical memory list & see if we're there */
   2489 	for (i = 0; i < phys_installed_size; i++) {
   2490 		if ((phys_installed[i].start <= pa) &&
   2491 				(phys_installed[i].start +
   2492 				 phys_installed[i].size >= pa))
   2493 			return 1;
   2494 	}
   2495 	return 0;
   2496 }
   2497 
   2498 /*
   2499  * Lookup the appropriate TSB entry.
   2500  *
   2501  * Here is the full official pseudo code:
   2502  *
   2503  */
   2504 
   2505 #ifdef NOTYET
   2506 int64 GenerateTSBPointer(
   2507  	int64 va,		/* Missing VA			*/
   2508  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
   2509  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
   2510  	Boolean split,		/* TSB Register[12]		*/
   2511  	int TSBSize)		/* TSB Register[2:0]		*/
   2512 {
   2513  	int64 vaPortion;
   2514  	int64 TSBBaseMask;
   2515  	int64 splitMask;
   2516 
   2517 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
   2518 	TSBBaseMask = 0xffffffffffffe000 <<
   2519 		(split? (TSBsize + 1) : TSBsize);
   2520 
   2521 	/* Shift va towards lsb appropriately and		*/
   2522 	/* zero out the original va page offset			*/
   2523 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
   2524 		0xfffffffffffffff0;
   2525 
   2526 	if (split) {
   2527 		/* There's only one bit in question for split	*/
   2528 		splitMask = 1 << (13 + TSBsize);
   2529 		if (type == 8K_POINTER)
   2530 			/* Make sure we're in the lower half	*/
   2531 			vaPortion &= ~splitMask;
   2532 		else
   2533 			/* Make sure we're in the upper half	*/
   2534 			vaPortion |= splitMask;
   2535 	}
   2536 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
   2537 }
   2538 #endif
   2539 /*
   2540  * Of course, since we are not using a split TSB or variable page sizes,
   2541  * we can optimize this a bit.
   2542  *
   2543  * The following only works for a unified 8K TSB.  It will find the slot
   2544  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
   2545  */
   2546 int
   2547 ptelookup_va(vaddr_t va)
   2548 {
   2549 	long tsbptr;
   2550 #define TSBBASEMASK	(0xffffffffffffe000LL << tsbsize)
   2551 
   2552 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK);
   2553 	return (tsbptr / sizeof(pte_t));
   2554 }
   2555 
   2556 /*
   2557  * Do whatever is needed to sync the MOD/REF flags
   2558  */
   2559 
   2560 bool
   2561 pmap_clear_modify(struct vm_page *pg)
   2562 {
   2563 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2564 	pv_entry_t pv;
   2565 	int rv;
   2566 	int changed = 0;
   2567 #ifdef DEBUG
   2568 	int modified = 0;
   2569 
   2570 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify(%p)\n", pg));
   2571 
   2572 	modified = pmap_is_modified(pg);
   2573 #endif
   2574 	mutex_enter(&pmap_lock);
   2575 	/* Clear all mappings */
   2576 	pv = &md->mdpg_pvh;
   2577 #ifdef DEBUG
   2578 	if (pv->pv_va & PV_MOD)
   2579 		pv->pv_va |= PV_WE;	/* Remember this was modified */
   2580 #endif
   2581 	if (pv->pv_va & PV_MOD) {
   2582 		changed |= 1;
   2583 		pv->pv_va &= ~PV_MOD;
   2584 	}
   2585 #ifdef DEBUG
   2586 	if (pv->pv_next && !pv->pv_pmap) {
   2587 		printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv);
   2588 		Debugger();
   2589 	}
   2590 #endif
   2591 	if (pv->pv_pmap != NULL) {
   2592 		for (; pv; pv = pv->pv_next) {
   2593 			int64_t data;
   2594 			struct pmap *pmap = pv->pv_pmap;
   2595 			vaddr_t va = pv->pv_va & PV_VAMASK;
   2596 
   2597 			/* First clear the mod bit in the PTE and make it R/O */
   2598 			data = pseg_get(pmap, va);
   2599 			KASSERT(data & TLB_V);
   2600 			/* Need to both clear the modify and write bits */
   2601 			if (data & TLB_MODIFY)
   2602 				changed |= 1;
   2603 #ifdef HWREF
   2604 			data &= ~(TLB_MODIFY|TLB_W);
   2605 #else
   2606 			data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W);
   2607 #endif
   2608 			rv = pseg_set(pmap, va, data, 0);
   2609 			if (rv & 1)
   2610 				printf("pmap_clear_modify: pseg_set needs"
   2611 				    " spare! rv=%d\n", rv);
   2612 			if (pmap_is_on_mmu(pmap)) {
   2613 				KASSERT(pmap_ctx(pmap)>=0);
   2614 				tsb_invalidate(va, pmap);
   2615 				tlb_flush_pte(va, pmap);
   2616 			}
   2617 			/* Then clear the mod bit in the pv */
   2618 			if (pv->pv_va & PV_MOD) {
   2619 				changed |= 1;
   2620 				pv->pv_va &= ~PV_MOD;
   2621 			}
   2622 		}
   2623 	}
   2624 	pv_check();
   2625 	mutex_exit(&pmap_lock);
   2626 #ifdef DEBUG
   2627 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify: pg %p %s\n", pg,
   2628 	    (changed ? "was modified" : "was not modified")));
   2629 	if (modified && modified != changed) {
   2630 		printf("pmap_clear_modify: modified %d changed %d\n",
   2631 		       modified, changed);
   2632 		Debugger();
   2633 	}
   2634 #endif
   2635 	return (changed);
   2636 }
   2637 
   2638 bool
   2639 pmap_clear_reference(struct vm_page *pg)
   2640 {
   2641 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2642 	pv_entry_t pv;
   2643 	int rv;
   2644 	int changed = 0;
   2645 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
   2646 	int referenced = 0;
   2647 #endif
   2648 
   2649 	mutex_enter(&pmap_lock);
   2650 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
   2651 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg));
   2652 	referenced = pmap_is_referenced_locked(pg);
   2653 #endif
   2654 	/* Clear all references */
   2655 	pv = &md->mdpg_pvh;
   2656 	if (pv->pv_va & PV_REF) {
   2657 		changed |= 1;
   2658 		pv->pv_va &= ~PV_REF;
   2659 	}
   2660 #ifdef DEBUG
   2661 	if (pv->pv_next && !pv->pv_pmap) {
   2662 		printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv);
   2663 		Debugger();
   2664 	}
   2665 #endif
   2666 	if (pv->pv_pmap != NULL) {
   2667 		for (; pv; pv = pv->pv_next) {
   2668 			int64_t data;
   2669 			struct pmap *pmap = pv->pv_pmap;
   2670 			vaddr_t va = pv->pv_va & PV_VAMASK;
   2671 
   2672 			data = pseg_get(pmap, va);
   2673 			KASSERT(data & TLB_V);
   2674 			DPRINTF(PDB_CHANGEPROT,
   2675 			    ("clearing ref pm:%p va:%p ctx:%lx data:%llx\n",
   2676 			     pmap, (void *)(u_long)va,
   2677 			     (u_long)pmap_ctx(pmap),
   2678 			     (long long)data));
   2679 #ifdef HWREF
   2680 			if (data & TLB_ACCESS) {
   2681 				changed |= 1;
   2682 				data &= ~TLB_ACCESS;
   2683 			}
   2684 #else
   2685 			if (data < 0)
   2686 				changed |= 1;
   2687 			data = 0;
   2688 #endif
   2689 			rv = pseg_set(pmap, va, data, 0);
   2690 			if (rv & 1)
   2691 				panic("pmap_clear_reference: pseg_set needs"
   2692 				    " spare! rv=%d\n", rv);
   2693 			if (pmap_is_on_mmu(pmap)) {
   2694 				KASSERT(pmap_ctx(pmap)>=0);
   2695 				tsb_invalidate(va, pmap);
   2696 				tlb_flush_pte(va, pmap);
   2697 			}
   2698 			if (pv->pv_va & PV_REF) {
   2699 				changed |= 1;
   2700 				pv->pv_va &= ~PV_REF;
   2701 			}
   2702 		}
   2703 	}
   2704 	dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
   2705 	pv_check();
   2706 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
   2707 	if (pmap_is_referenced_locked(pg)) {
   2708 		pv = &md->mdpg_pvh;
   2709 		printf("pmap_clear_reference(): %p still referenced "
   2710 			"(pmap = %p, ctx = %d)\n", pg, pv->pv_pmap,
   2711 			pv->pv_pmap ? pmap_ctx(pv->pv_pmap) : 0);
   2712 		Debugger();
   2713 	}
   2714 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
   2715 	    ("pmap_clear_reference: pg %p %s\n", pg,
   2716 	     (changed ? "was referenced" : "was not referenced")));
   2717 	if (referenced != changed) {
   2718 		printf("pmap_clear_reference: referenced %d changed %d\n",
   2719 		       referenced, changed);
   2720 		Debugger();
   2721 	} else {
   2722 		mutex_exit(&pmap_lock);
   2723 		return (referenced);
   2724 	}
   2725 #endif
   2726 	mutex_exit(&pmap_lock);
   2727 	return (changed);
   2728 }
   2729 
   2730 bool
   2731 pmap_is_modified(struct vm_page *pg)
   2732 {
   2733 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2734 	pv_entry_t pv, npv;
   2735 	bool res = false;
   2736 
   2737 	/* Check if any mapping has been modified */
   2738 	pv = &md->mdpg_pvh;
   2739 	if (pv->pv_va & PV_MOD)
   2740 		res = true;
   2741 #ifdef HWREF
   2742 #ifdef DEBUG
   2743 	if (pv->pv_next && !pv->pv_pmap) {
   2744 		printf("pmap_is_modified: npv but no pmap for pv %p\n", pv);
   2745 		Debugger();
   2746 	}
   2747 #endif
   2748 	if (!res && pv->pv_pmap != NULL) {
   2749 		mutex_enter(&pmap_lock);
   2750 		for (npv = pv; !res && npv && npv->pv_pmap;
   2751 		     npv = npv->pv_next) {
   2752 			int64_t data;
   2753 
   2754 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
   2755 			KASSERT(data & TLB_V);
   2756 			if (data & TLB_MODIFY)
   2757 				res = true;
   2758 
   2759 			/* Migrate modify info to head pv */
   2760 			if (npv->pv_va & PV_MOD) {
   2761 				res = true;
   2762 				npv->pv_va &= ~PV_MOD;
   2763 			}
   2764 		}
   2765 		/* Save modify info */
   2766 		if (res)
   2767 			pv->pv_va |= PV_MOD;
   2768 #ifdef DEBUG
   2769 		if (res)
   2770 			pv->pv_va |= PV_WE;
   2771 #endif
   2772 		mutex_exit(&pmap_lock);
   2773 	}
   2774 #endif
   2775 
   2776 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_is_modified(%p) = %d\n", pg,
   2777 	    res));
   2778 	pv_check();
   2779 	return res;
   2780 }
   2781 
   2782 /*
   2783  * Variant of pmap_is_reference() where caller already holds pmap_lock
   2784  */
   2785 static bool
   2786 pmap_is_referenced_locked(struct vm_page *pg)
   2787 {
   2788 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2789 	pv_entry_t pv, npv;
   2790 	bool res = false;
   2791 
   2792 	KASSERT(mutex_owned(&pmap_lock));
   2793 
   2794 	/* Check if any mapping has been referenced */
   2795 	pv = &md->mdpg_pvh;
   2796 	if (pv->pv_va & PV_REF)
   2797 		return true;
   2798 
   2799 #ifdef HWREF
   2800 #ifdef DEBUG
   2801 	if (pv->pv_next && !pv->pv_pmap) {
   2802 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
   2803 		Debugger();
   2804 	}
   2805 #endif
   2806 	if (pv->pv_pmap == NULL)
   2807 		return false;
   2808 
   2809 	for (npv = pv; npv; npv = npv->pv_next) {
   2810 		int64_t data;
   2811 
   2812 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
   2813 		KASSERT(data & TLB_V);
   2814 		if (data & TLB_ACCESS)
   2815 			res = true;
   2816 
   2817 		/* Migrate ref info to head pv */
   2818 		if (npv->pv_va & PV_REF) {
   2819 			res = true;
   2820 			npv->pv_va &= ~PV_REF;
   2821 		}
   2822 	}
   2823 	/* Save ref info */
   2824 	if (res)
   2825 		pv->pv_va |= PV_REF;
   2826 #endif
   2827 
   2828 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
   2829 		("pmap_is_referenced(%p) = %d\n", pg, res));
   2830 	pv_check();
   2831 	return res;
   2832 }
   2833 
   2834 bool
   2835 pmap_is_referenced(struct vm_page *pg)
   2836 {
   2837 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2838 	pv_entry_t pv;
   2839 	bool res = false;
   2840 
   2841 	/* Check if any mapping has been referenced */
   2842 	pv = &md->mdpg_pvh;
   2843 	if (pv->pv_va & PV_REF)
   2844 		return true;
   2845 
   2846 #ifdef HWREF
   2847 #ifdef DEBUG
   2848 	if (pv->pv_next && !pv->pv_pmap) {
   2849 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
   2850 		Debugger();
   2851 	}
   2852 #endif
   2853 	if (pv->pv_pmap != NULL) {
   2854 		mutex_enter(&pmap_lock);
   2855 		res = pmap_is_referenced_locked(pg);
   2856 		mutex_exit(&pmap_lock);
   2857 	}
   2858 #endif
   2859 
   2860 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
   2861 		("pmap_is_referenced(%p) = %d\n", pg, res));
   2862 	pv_check();
   2863 	return res;
   2864 }
   2865 
   2866 
   2867 
   2868 /*
   2869  *	Routine:	pmap_unwire
   2870  *	Function:	Clear the wired attribute for a map/virtual-address
   2871  *			pair.
   2872  *	In/out conditions:
   2873  *			The mapping must already exist in the pmap.
   2874  */
   2875 void
   2876 pmap_unwire(pmap_t pmap, vaddr_t va)
   2877 {
   2878 	int64_t data;
   2879 	int rv;
   2880 
   2881 	DPRINTF(PDB_MMU_STEAL, ("pmap_unwire(%p, %lx)\n", pmap, va));
   2882 
   2883 #ifdef DEBUG
   2884 	/*
   2885 	 * Is this part of the permanent 4MB mapping?
   2886 	 */
   2887 	if (pmap == pmap_kernel() && va >= ktext &&
   2888 		va < roundup(ekdata, 4*MEG)) {
   2889 		prom_printf("pmap_unwire: va=%08x in locked TLB\n", va);
   2890 		prom_abort();
   2891 		return;
   2892 	}
   2893 #endif
   2894 	data = pseg_get(pmap, va & PV_VAMASK);
   2895 	KASSERT(data & TLB_V);
   2896 	data &= ~TLB_TSB_LOCK;
   2897 	rv = pseg_set(pmap, va & PV_VAMASK, data, 0);
   2898 	if (rv & 1)
   2899 		panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv);
   2900 	pv_check();
   2901 }
   2902 
   2903 /*
   2904  * Lower the protection on the specified physical page.
   2905  *
   2906  * Never enable writing as it will break COW
   2907  */
   2908 
   2909 void
   2910 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
   2911 {
   2912 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   2913 	int64_t clear, set;
   2914 	int64_t data = 0;
   2915 	int rv;
   2916 	pv_entry_t pv, npv, freepv = NULL;
   2917 	struct pmap *pmap;
   2918 	vaddr_t va;
   2919 	bool needflush = FALSE;
   2920 
   2921 	DPRINTF(PDB_CHANGEPROT,
   2922 	    ("pmap_page_protect: pg %p prot %x\n", pg, prot));
   2923 
   2924 	mutex_enter(&pmap_lock);
   2925 	pv = &md->mdpg_pvh;
   2926 	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
   2927 		/* copy_on_write */
   2928 
   2929 		set = TLB_V;
   2930 		clear = TLB_REAL_W|TLB_W;
   2931 		if (VM_PROT_EXECUTE & prot)
   2932 			set |= TLB_EXEC;
   2933 		else
   2934 			clear |= TLB_EXEC;
   2935 		if (VM_PROT_EXECUTE == prot)
   2936 			set |= TLB_EXEC_ONLY;
   2937 
   2938 #ifdef DEBUG
   2939 		if (pv->pv_next && !pv->pv_pmap) {
   2940 			printf("pmap_page_protect: no pmap for pv %p\n", pv);
   2941 			Debugger();
   2942 		}
   2943 #endif
   2944 		if (pv->pv_pmap != NULL) {
   2945 			for (; pv; pv = pv->pv_next) {
   2946 				pmap = pv->pv_pmap;
   2947 				va = pv->pv_va & PV_VAMASK;
   2948 
   2949 				DPRINTF(PDB_CHANGEPROT | PDB_REF,
   2950 					("pmap_page_protect: "
   2951 					 "RO va %p of pg %p...\n",
   2952 					 (void *)(u_long)pv->pv_va, pg));
   2953 				data = pseg_get(pmap, va);
   2954 				KASSERT(data & TLB_V);
   2955 
   2956 				/* Save REF/MOD info */
   2957 				if (data & TLB_ACCESS)
   2958 					pv->pv_va |= PV_REF;
   2959 				if (data & TLB_MODIFY)
   2960 					pv->pv_va |= PV_MOD;
   2961 
   2962 				data &= ~clear;
   2963 				data |= set;
   2964 				rv = pseg_set(pmap, va, data, 0);
   2965 				if (rv & 1)
   2966 					panic("pmap_page_protect: "
   2967 					       "pseg_set needs spare! rv=%d\n",
   2968 					       rv);
   2969 				if (pmap_is_on_mmu(pmap)) {
   2970 					KASSERT(pmap_ctx(pmap)>=0);
   2971 					tsb_invalidate(va, pmap);
   2972 					tlb_flush_pte(va, pmap);
   2973 				}
   2974 			}
   2975 		}
   2976 	} else {
   2977 		/* remove mappings */
   2978 		DPRINTF(PDB_REMOVE,
   2979 			("pmap_page_protect: demapping pg %p\n", pg));
   2980 
   2981 		/* First remove the entire list of continuation pv's */
   2982 		for (npv = pv->pv_next; npv; npv = pv->pv_next) {
   2983 			pmap = npv->pv_pmap;
   2984 			va = npv->pv_va & PV_VAMASK;
   2985 
   2986 			/* We're removing npv from pv->pv_next */
   2987 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
   2988 				("pmap_page_protect: "
   2989 				 "demap va %p of pg %p in pmap %p...\n",
   2990 				 (void *)(u_long)va, pg, pmap));
   2991 
   2992 			/* clear the entry in the page table */
   2993 			data = pseg_get(pmap, va);
   2994 			KASSERT(data & TLB_V);
   2995 
   2996 			/* Save ref/mod info */
   2997 			if (data & TLB_ACCESS)
   2998 				pv->pv_va |= PV_REF;
   2999 			if (data & TLB_MODIFY)
   3000 				pv->pv_va |= PV_MOD;
   3001 			/* Clear mapping */
   3002 			rv = pseg_set(pmap, va, 0, 0);
   3003 			if (rv & 1)
   3004 				panic("pmap_page_protect: pseg_set needs"
   3005 				     " spare! rv=%d\n", rv);
   3006 			if (pmap_is_on_mmu(pmap)) {
   3007 				KASSERT(pmap_ctx(pmap)>=0);
   3008 				tsb_invalidate(va, pmap);
   3009 				tlb_flush_pte(va, pmap);
   3010 			}
   3011 			if (pmap->pm_refs > 0) {
   3012 				needflush = TRUE;
   3013 			}
   3014 
   3015 			/* free the pv */
   3016 			pv->pv_next = npv->pv_next;
   3017 			npv->pv_next = freepv;
   3018 			freepv = npv;
   3019 		}
   3020 
   3021 		/* Then remove the primary pv */
   3022 #ifdef DEBUG
   3023 		if (pv->pv_next && !pv->pv_pmap) {
   3024 			printf("pmap_page_protect: no pmap for pv %p\n", pv);
   3025 			Debugger();
   3026 		}
   3027 #endif
   3028 		if (pv->pv_pmap != NULL) {
   3029 			pmap = pv->pv_pmap;
   3030 			va = pv->pv_va & PV_VAMASK;
   3031 
   3032 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
   3033 				("pmap_page_protect: "
   3034 				 "demap va %p of pg %p from pm %p...\n",
   3035 				 (void *)(u_long)va, pg, pmap));
   3036 
   3037 			data = pseg_get(pmap, va);
   3038 			KASSERT(data & TLB_V);
   3039 			/* Save ref/mod info */
   3040 			if (data & TLB_ACCESS)
   3041 				pv->pv_va |= PV_REF;
   3042 			if (data & TLB_MODIFY)
   3043 				pv->pv_va |= PV_MOD;
   3044 			rv = pseg_set(pmap, va, 0, 0);
   3045 			if (rv & 1)
   3046 				panic("pmap_page_protect: pseg_set needs"
   3047 				    " spare! rv=%d\n", rv);
   3048 			if (pmap_is_on_mmu(pmap)) {
   3049 			    	KASSERT(pmap_ctx(pmap)>=0);
   3050 				tsb_invalidate(va, pmap);
   3051 				tlb_flush_pte(va, pmap);
   3052 			}
   3053 			if (pmap->pm_refs > 0) {
   3054 				needflush = TRUE;
   3055 			}
   3056 			npv = pv->pv_next;
   3057 			/* dump the first pv */
   3058 			if (npv) {
   3059 				/* First save mod/ref bits */
   3060 				pv->pv_pmap = npv->pv_pmap;
   3061 				pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
   3062 				pv->pv_next = npv->pv_next;
   3063 				npv->pv_next = freepv;
   3064 				freepv = npv;
   3065 			} else {
   3066 				pv->pv_pmap = NULL;
   3067 				pv->pv_next = NULL;
   3068 			}
   3069 		}
   3070 		if (needflush)
   3071 			dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
   3072 	}
   3073 	/* We should really only flush the pages we demapped. */
   3074 	pv_check();
   3075 	mutex_exit(&pmap_lock);
   3076 
   3077 	/* Catch up on deferred frees. */
   3078 	for (; freepv != NULL; freepv = npv) {
   3079 		npv = freepv->pv_next;
   3080 		pool_cache_put(&pmap_pv_cache, freepv);
   3081 	}
   3082 }
   3083 
   3084 #ifdef PMAP_COUNT_DEBUG
   3085 /*
   3086  * count pages in pmap -- this can be slow.
   3087  */
   3088 int
   3089 pmap_count_res(struct pmap *pm)
   3090 {
   3091 	int64_t data;
   3092 	paddr_t *pdir, *ptbl;
   3093 	int i, j, k, n;
   3094 
   3095 	/* Don't want one of these pages reused while we're reading it. */
   3096 	mutex_enter(&pmap_lock);
   3097 	n = 0;
   3098 	for (i = 0; i < STSZ; i++) {
   3099 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
   3100 					       ASI_PHYS_CACHED);
   3101 		if (pdir == NULL) {
   3102 			continue;
   3103 		}
   3104 		for (k = 0; k < PDSZ; k++) {
   3105 			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
   3106 						       ASI_PHYS_CACHED);
   3107 			if (ptbl == NULL) {
   3108 				continue;
   3109 			}
   3110 			for (j = 0; j < PTSZ; j++) {
   3111 				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
   3112 						     ASI_PHYS_CACHED);
   3113 				if (data & TLB_V)
   3114 					n++;
   3115 			}
   3116 		}
   3117 	}
   3118 	mutex_exit(&pmap_lock);
   3119 
   3120 	if (pm->pm_stats.resident_count != n)
   3121 		printf("pmap_count_resident: pm_stats = %ld, counted: %d\n",
   3122 		    pm->pm_stats.resident_count, n);
   3123 
   3124 	return n;
   3125 }
   3126 
   3127 /*
   3128  * count wired pages in pmap -- this can be slow.
   3129  */
   3130 int
   3131 pmap_count_wired(struct pmap *pm)
   3132 {
   3133 	int64_t data;
   3134 	paddr_t *pdir, *ptbl;
   3135 	int i, j, k, n;
   3136 
   3137 	/* Don't want one of these pages reused while we're reading it. */
   3138 	mutex_enter(&pmap_lock);	/* XXX uvmplock */
   3139 	n = 0;
   3140 	for (i = 0; i < STSZ; i++) {
   3141 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
   3142 					       ASI_PHYS_CACHED);
   3143 		if (pdir == NULL) {
   3144 			continue;
   3145 		}
   3146 		for (k = 0; k < PDSZ; k++) {
   3147 			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
   3148 						       ASI_PHYS_CACHED);
   3149 			if (ptbl == NULL) {
   3150 				continue;
   3151 			}
   3152 			for (j = 0; j < PTSZ; j++) {
   3153 				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
   3154 						     ASI_PHYS_CACHED);
   3155 				if (data & TLB_TSB_LOCK)
   3156 					n++;
   3157 			}
   3158 		}
   3159 	}
   3160 	mutex_exit(&pmap_lock);	/* XXX uvmplock */
   3161 
   3162 	if (pm->pm_stats.wired_count != n)
   3163 		printf("pmap_count_wired: pm_stats = %ld, counted: %d\n",
   3164 		    pm->pm_stats.wired_count, n);
   3165 
   3166 	return n;
   3167 }
   3168 #endif	/* PMAP_COUNT_DEBUG */
   3169 
   3170 void
   3171 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
   3172 {
   3173 
   3174 	blast_icache();
   3175 }
   3176 
   3177 /*
   3178  * Allocate a hardware context to the given pmap.
   3179  */
   3180 static int
   3181 ctx_alloc(struct pmap *pm)
   3182 {
   3183 	int i, ctx;
   3184 
   3185 	KASSERT(pm != pmap_kernel());
   3186 	KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
   3187 	mutex_enter(&curcpu()->ci_ctx_lock);
   3188 	ctx = curcpu()->ci_pmap_next_ctx++;
   3189 
   3190 	/*
   3191 	 * if we have run out of contexts, remove all user entries from
   3192 	 * the TSB, TLB and dcache and start over with context 1 again.
   3193 	 */
   3194 
   3195 	if (ctx == curcpu()->ci_numctx) {
   3196 		DPRINTF(PDB_CTX_ALLOC|PDB_CTX_FLUSHALL,
   3197 			("ctx_alloc: cpu%d run out of contexts %d\n",
   3198 			 cpu_number(), curcpu()->ci_numctx));
   3199 		write_user_windows();
   3200 		while (!LIST_EMPTY(&curcpu()->ci_pmap_ctxlist)) {
   3201 #ifdef MULTIPROCESSOR
   3202 			KASSERT(pmap_ctx(LIST_FIRST(&curcpu()->ci_pmap_ctxlist)) != 0);
   3203 #endif
   3204 			ctx_free(LIST_FIRST(&curcpu()->ci_pmap_ctxlist),
   3205 				 curcpu());
   3206 		}
   3207 		for (i = TSBENTS - 1; i >= 0; i--) {
   3208 			if (TSB_TAG_CTX(curcpu()->ci_tsb_dmmu[i].tag) != 0) {
   3209 				clrx(&curcpu()->ci_tsb_dmmu[i].data);
   3210 			}
   3211 			if (TSB_TAG_CTX(curcpu()->ci_tsb_immu[i].tag) != 0) {
   3212 				clrx(&curcpu()->ci_tsb_immu[i].data);
   3213 			}
   3214 		}
   3215 		sp_tlb_flush_all();
   3216 		ctx = 1;
   3217 		curcpu()->ci_pmap_next_ctx = 2;
   3218 	}
   3219 	curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr;
   3220 	LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]);
   3221 	pmap_ctx(pm) = ctx;
   3222 	mutex_exit(&curcpu()->ci_ctx_lock);
   3223 	DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: cpu%d allocated ctx %d\n",
   3224 		cpu_number(), ctx));
   3225 	return ctx;
   3226 }
   3227 
   3228 /*
   3229  * Give away a context.
   3230  */
   3231 static void
   3232 ctx_free(struct pmap *pm, struct cpu_info *ci)
   3233 {
   3234 	int oldctx;
   3235 	int cpunum;
   3236 
   3237 	KASSERT(mutex_owned(&ci->ci_ctx_lock));
   3238 
   3239 #ifdef MULTIPROCESSOR
   3240 	cpunum = ci->ci_index;
   3241 #else
   3242 	/* Give the compiler a hint.. */
   3243 	cpunum = 0;
   3244 #endif
   3245 
   3246 	oldctx = pm->pm_ctx[cpunum];
   3247 	if (oldctx == 0)
   3248 		return;
   3249 
   3250 #ifdef DIAGNOSTIC
   3251 	if (pm == pmap_kernel())
   3252 		panic("ctx_free: freeing kernel context");
   3253 	if (ci->ci_ctxbusy[oldctx] == 0)
   3254 		printf("ctx_free: freeing free context %d\n", oldctx);
   3255 	if (ci->ci_ctxbusy[oldctx] != pm->pm_physaddr) {
   3256 		printf("ctx_free: freeing someone else's context\n "
   3257 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
   3258 		       oldctx, (void *)(u_long)ci->ci_ctxbusy[oldctx], pm,
   3259 		       (void *)(u_long)pm->pm_physaddr);
   3260 		Debugger();
   3261 	}
   3262 #endif
   3263 	/* We should verify it has not been stolen and reallocated... */
   3264 	DPRINTF(PDB_CTX_ALLOC, ("ctx_free: cpu%d freeing ctx %d\n",
   3265 		cpu_number(), oldctx));
   3266 	ci->ci_ctxbusy[oldctx] = 0UL;
   3267 	pm->pm_ctx[cpunum] = 0;
   3268 	LIST_REMOVE(pm, pm_list[cpunum]);
   3269 }
   3270 
   3271 /*
   3272  * Enter the pmap and virtual address into the
   3273  * physical to virtual map table.
   3274  *
   3275  * We enter here with the pmap locked.
   3276  * The pv_entry_t in *npvp is replaced with NULL if this function
   3277  * uses it, otherwise the caller needs to free it.
   3278  */
   3279 
   3280 void
   3281 pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg,
   3282 	      pv_entry_t *npvp)
   3283 {
   3284 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   3285 	pv_entry_t pvh, npv;
   3286 
   3287 	KASSERT(mutex_owned(&pmap_lock));
   3288 
   3289 	pvh = &md->mdpg_pvh;
   3290 	DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n",
   3291 	    pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next));
   3292 	if (pvh->pv_pmap == NULL) {
   3293 
   3294 		/*
   3295 		 * No entries yet, use header as the first entry
   3296 		 */
   3297 		DPRINTF(PDB_ENTER, ("pmap_enter: first pv: pmap %p va %lx\n",
   3298 		    pmap, va));
   3299 		ENTER_STAT(firstpv);
   3300 		PV_SETVA(pvh, va);
   3301 		pvh->pv_pmap = pmap;
   3302 		pvh->pv_next = NULL;
   3303 	} else {
   3304 		if (pg->loan_count == 0 && !(pvh->pv_va & PV_ALIAS)) {
   3305 
   3306 			/*
   3307 			 * There is at least one other VA mapping this page.
   3308 			 * Check if they are cache index compatible. If not
   3309 			 * remove all mappings, flush the cache and set page
   3310 			 * to be mapped uncached. Caching will be restored
   3311 			 * when pages are mapped compatible again.
   3312 			 */
   3313 			if ((pvh->pv_va ^ va) & VA_ALIAS_MASK) {
   3314 				pvh->pv_va |= PV_ALIAS;
   3315 				pmap_page_cache(pmap, pa, 0);
   3316 				ENTER_STAT(ci);
   3317 			}
   3318 		}
   3319 
   3320 		/*
   3321 		 * There is at least one other VA mapping this page.
   3322 		 * Place this entry after the header.
   3323 		 */
   3324 
   3325 		DPRINTF(PDB_ENTER, ("pmap_enter: new pv: pmap %p va %lx\n",
   3326 		    pmap, va));
   3327 		npv = *npvp;
   3328 		*npvp = NULL;
   3329 		npv->pv_pmap = pmap;
   3330 		npv->pv_va = va & PV_VAMASK;
   3331 		npv->pv_next = pvh->pv_next;
   3332 		pvh->pv_next = npv;
   3333 
   3334 		if (!npv->pv_next) {
   3335 			ENTER_STAT(secondpv);
   3336 		}
   3337 	}
   3338 }
   3339 
   3340 /*
   3341  * Remove a physical to virtual address translation.
   3342  */
   3343 
   3344 pv_entry_t
   3345 pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg)
   3346 {
   3347 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
   3348 	pv_entry_t pvh, npv, pv;
   3349 	int64_t data = 0;
   3350 
   3351 	KASSERT(mutex_owned(&pmap_lock));
   3352 
   3353 	pvh = &md->mdpg_pvh;
   3354 
   3355 	DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap,
   3356 	    (void *)(u_long)va, pg));
   3357 	pv_check();
   3358 
   3359 	/*
   3360 	 * Remove page from the PV table.
   3361 	 * If it is the first entry on the list, it is actually
   3362 	 * in the header and we must copy the following entry up
   3363 	 * to the header.  Otherwise we must search the list for
   3364 	 * the entry.  In either case we free the now unused entry.
   3365 	 */
   3366 	if (pmap == pvh->pv_pmap && PV_MATCH(pvh, va)) {
   3367 		data = pseg_get(pvh->pv_pmap, pvh->pv_va & PV_VAMASK);
   3368 		KASSERT(data & TLB_V);
   3369 		npv = pvh->pv_next;
   3370 		if (npv) {
   3371 			/* First save mod/ref bits */
   3372 			pvh->pv_va = (pvh->pv_va & PV_MASK) | npv->pv_va;
   3373 			pvh->pv_next = npv->pv_next;
   3374 			pvh->pv_pmap = npv->pv_pmap;
   3375 		} else {
   3376 			pvh->pv_pmap = NULL;
   3377 			pvh->pv_next = NULL;
   3378 			pvh->pv_va &= (PV_REF|PV_MOD);
   3379 		}
   3380 		REMOVE_STAT(pvfirst);
   3381 	} else {
   3382 		for (pv = pvh, npv = pvh->pv_next; npv;
   3383 		     pv = npv, npv = npv->pv_next) {
   3384 			REMOVE_STAT(pvsearch);
   3385 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
   3386 				break;
   3387 		}
   3388 		pv->pv_next = npv->pv_next;
   3389 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
   3390 		KASSERT(data & TLB_V);
   3391 	}
   3392 
   3393 	/* Save ref/mod info */
   3394 	if (data & TLB_ACCESS)
   3395 		pvh->pv_va |= PV_REF;
   3396 	if (data & TLB_MODIFY)
   3397 		pvh->pv_va |= PV_MOD;
   3398 
   3399 	/* Check to see if the alias went away */
   3400 	if (pvh->pv_va & PV_ALIAS) {
   3401 		pvh->pv_va &= ~PV_ALIAS;
   3402 		for (pv = pvh; pv; pv = pv->pv_next) {
   3403 			if ((pv->pv_va ^ pvh->pv_va) & VA_ALIAS_MASK) {
   3404 				pvh->pv_va |= PV_ALIAS;
   3405 				break;
   3406 			}
   3407 		}
   3408 		if (!(pvh->pv_va & PV_ALIAS))
   3409 			pmap_page_cache(pmap, VM_PAGE_TO_PHYS(pg), 1);
   3410 	}
   3411 	pv_check();
   3412 	return npv;
   3413 }
   3414 
   3415 /*
   3416  *	pmap_page_cache:
   3417  *
   3418  *	Change all mappings of a page to cached/uncached.
   3419  */
   3420 void
   3421 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
   3422 {
   3423 	struct vm_page *pg;
   3424 	struct vm_page_md *md;
   3425 	pv_entry_t pv;
   3426 	vaddr_t va;
   3427 	int rv;
   3428 
   3429 #if 0
   3430 	/*
   3431 	 * Why is this?
   3432 	 */
   3433 	if (CPU_ISSUN4US || CPU_ISSUN4V)
   3434 		return;
   3435 #endif
   3436 
   3437 	KASSERT(mutex_owned(&pmap_lock));
   3438 
   3439 	DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n",
   3440 	    (unsigned long long)pa));
   3441 	pg = PHYS_TO_VM_PAGE(pa);
   3442 	md = VM_PAGE_TO_MD(pg);
   3443 	pv = &md->mdpg_pvh;
   3444 	while (pv) {
   3445 		va = pv->pv_va & PV_VAMASK;
   3446 		if (pv->pv_va & PV_NC) {
   3447 			int64_t data;
   3448 
   3449 			/* Non-cached -- I/O mapping */
   3450 			data = pseg_get(pv->pv_pmap, va);
   3451 			KASSERT(data & TLB_V);
   3452 			rv = pseg_set(pv->pv_pmap, va,
   3453 				     data & ~(TLB_CV|TLB_CP), 0);
   3454 			if (rv & 1)
   3455 				panic("pmap_page_cache: pseg_set needs"
   3456 				     " spare! rv=%d\n", rv);
   3457 		} else if (mode && (!(pv->pv_va & PV_NVC))) {
   3458 			int64_t data;
   3459 
   3460 			/* Enable caching */
   3461 			data = pseg_get(pv->pv_pmap, va);
   3462 			KASSERT(data & TLB_V);
   3463 			rv = pseg_set(pv->pv_pmap, va, data | TLB_CV, 0);
   3464 			if (rv & 1)
   3465 				panic("pmap_page_cache: pseg_set needs"
   3466 				    " spare! rv=%d\n", rv);
   3467 		} else {
   3468 			int64_t data;
   3469 
   3470 			/* Disable caching */
   3471 			data = pseg_get(pv->pv_pmap, va);
   3472 			KASSERT(data & TLB_V);
   3473 			rv = pseg_set(pv->pv_pmap, va, data & ~TLB_CV, 0);
   3474 			if (rv & 1)
   3475 				panic("pmap_page_cache: pseg_set needs"
   3476 				    " spare! rv=%d\n", rv);
   3477 		}
   3478 		if (pmap_is_on_mmu(pv->pv_pmap)) {
   3479 			/* Force reload -- cache bits have changed */
   3480 			KASSERT(pmap_ctx(pv->pv_pmap)>=0);
   3481 			tsb_invalidate(va, pv->pv_pmap);
   3482 			tlb_flush_pte(va, pv->pv_pmap);
   3483 		}
   3484 		pv = pv->pv_next;
   3485 	}
   3486 }
   3487 
   3488 /*
   3489  * Some routines to allocate and free PTPs.
   3490  */
   3491 static int
   3492 pmap_get_page(paddr_t *p)
   3493 {
   3494 	struct vm_page *pg;
   3495 	paddr_t pa;
   3496 
   3497 	if (uvm.page_init_done) {
   3498 		pg = uvm_pagealloc(NULL, 0, NULL,
   3499 		    UVM_PGA_ZERO | UVM_PGA_USERESERVE);
   3500 		if (pg == NULL)
   3501 			return (0);
   3502 		pa = VM_PAGE_TO_PHYS(pg);
   3503 	} else {
   3504 		if (!uvm_page_physget(&pa))
   3505 			return (0);
   3506 		pmap_zero_page(pa);
   3507 	}
   3508 	*p = pa;
   3509 	return (1);
   3510 }
   3511 
   3512 static void
   3513 pmap_free_page(paddr_t pa, sparc64_cpuset_t cs)
   3514 {
   3515 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
   3516 
   3517 	dcache_flush_page_cpuset(pa, cs);
   3518 	uvm_pagefree(pg);
   3519 }
   3520 
   3521 static void
   3522 pmap_free_page_noflush(paddr_t pa)
   3523 {
   3524 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
   3525 
   3526 	uvm_pagefree(pg);
   3527 }
   3528 
   3529 #ifdef DDB
   3530 
   3531 void db_dump_pv(db_expr_t, int, db_expr_t, const char *);
   3532 void
   3533 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, const char *modif)
   3534 {
   3535 	struct vm_page *pg;
   3536 	struct vm_page_md *md;
   3537 	struct pv_entry *pv;
   3538 
   3539 	if (!have_addr) {
   3540 		db_printf("Need addr for pv\n");
   3541 		return;
   3542 	}
   3543 
   3544 	pg = PHYS_TO_VM_PAGE((paddr_t)addr);
   3545 	if (pg == NULL) {
   3546 		db_printf("page is not managed\n");
   3547 		return;
   3548 	}
   3549 	md = VM_PAGE_TO_MD(pg);
   3550 	for (pv = &md->mdpg_pvh; pv; pv = pv->pv_next)
   3551 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
   3552 			  pv, pv->pv_next, pv->pv_pmap,
   3553 			  (unsigned long long)pv->pv_va);
   3554 }
   3555 
   3556 #endif
   3557 
   3558 #ifdef DEBUG
   3559 /*
   3560  * Test ref/modify handling.  */
   3561 void pmap_testout(void);
   3562 void
   3563 pmap_testout(void)
   3564 {
   3565 	vaddr_t va;
   3566 	volatile int *loc;
   3567 	int val = 0;
   3568 	paddr_t pa;
   3569 	struct vm_page *pg;
   3570 	int ref, mod;
   3571 
   3572 	/* Allocate a page */
   3573 	va = (vaddr_t)(vmmap - PAGE_SIZE);
   3574 	KASSERT(va != 0);
   3575 	loc = (int*)va;
   3576 
   3577 	pmap_get_page(&pa);
   3578 	pg = PHYS_TO_VM_PAGE(pa);
   3579 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
   3580 	pmap_update(pmap_kernel());
   3581 
   3582 	/* Now clear reference and modify */
   3583 	ref = pmap_clear_reference(pg);
   3584 	mod = pmap_clear_modify(pg);
   3585 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3586 	       (void *)(u_long)va, (long)pa,
   3587 	       ref, mod);
   3588 
   3589 	/* Check it's properly cleared */
   3590 	ref = pmap_is_referenced(pg);
   3591 	mod = pmap_is_modified(pg);
   3592 	printf("Checking cleared page: ref %d, mod %d\n",
   3593 	       ref, mod);
   3594 
   3595 	/* Reference page */
   3596 	val = *loc;
   3597 
   3598 	ref = pmap_is_referenced(pg);
   3599 	mod = pmap_is_modified(pg);
   3600 	printf("Referenced page: ref %d, mod %d val %x\n",
   3601 	       ref, mod, val);
   3602 
   3603 	/* Now clear reference and modify */
   3604 	ref = pmap_clear_reference(pg);
   3605 	mod = pmap_clear_modify(pg);
   3606 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3607 	       (void *)(u_long)va, (long)pa,
   3608 	       ref, mod);
   3609 
   3610 	/* Modify page */
   3611 	*loc = 1;
   3612 
   3613 	ref = pmap_is_referenced(pg);
   3614 	mod = pmap_is_modified(pg);
   3615 	printf("Modified page: ref %d, mod %d\n",
   3616 	       ref, mod);
   3617 
   3618 	/* Now clear reference and modify */
   3619 	ref = pmap_clear_reference(pg);
   3620 	mod = pmap_clear_modify(pg);
   3621 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3622 	       (void *)(u_long)va, (long)pa,
   3623 	       ref, mod);
   3624 
   3625 	/* Check it's properly cleared */
   3626 	ref = pmap_is_referenced(pg);
   3627 	mod = pmap_is_modified(pg);
   3628 	printf("Checking cleared page: ref %d, mod %d\n",
   3629 	       ref, mod);
   3630 
   3631 	/* Modify page */
   3632 	*loc = 1;
   3633 
   3634 	ref = pmap_is_referenced(pg);
   3635 	mod = pmap_is_modified(pg);
   3636 	printf("Modified page: ref %d, mod %d\n",
   3637 	       ref, mod);
   3638 
   3639 	/* Check pmap_protect() */
   3640 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ);
   3641 	pmap_update(pmap_kernel());
   3642 	ref = pmap_is_referenced(pg);
   3643 	mod = pmap_is_modified(pg);
   3644 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
   3645 	       ref, mod);
   3646 
   3647 	/* Now clear reference and modify */
   3648 	ref = pmap_clear_reference(pg);
   3649 	mod = pmap_clear_modify(pg);
   3650 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3651 	       (void *)(u_long)va, (long)pa,
   3652 	       ref, mod);
   3653 
   3654 	/* Modify page */
   3655 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
   3656 	pmap_update(pmap_kernel());
   3657 	*loc = 1;
   3658 
   3659 	ref = pmap_is_referenced(pg);
   3660 	mod = pmap_is_modified(pg);
   3661 	printf("Modified page: ref %d, mod %d\n",
   3662 	       ref, mod);
   3663 
   3664 	/* Check pmap_protect() */
   3665 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE);
   3666 	pmap_update(pmap_kernel());
   3667 	ref = pmap_is_referenced(pg);
   3668 	mod = pmap_is_modified(pg);
   3669 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
   3670 	       ref, mod);
   3671 
   3672 	/* Now clear reference and modify */
   3673 	ref = pmap_clear_reference(pg);
   3674 	mod = pmap_clear_modify(pg);
   3675 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3676 	       (void *)(u_long)va, (long)pa,
   3677 	       ref, mod);
   3678 
   3679 	/* Modify page */
   3680 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
   3681 	pmap_update(pmap_kernel());
   3682 	*loc = 1;
   3683 
   3684 	ref = pmap_is_referenced(pg);
   3685 	mod = pmap_is_modified(pg);
   3686 	printf("Modified page: ref %d, mod %d\n",
   3687 	       ref, mod);
   3688 
   3689 	/* Check pmap_pag_protect() */
   3690 	pmap_page_protect(pg, VM_PROT_READ);
   3691 	ref = pmap_is_referenced(pg);
   3692 	mod = pmap_is_modified(pg);
   3693 	printf("pmap_protect(): ref %d, mod %d\n",
   3694 	       ref, mod);
   3695 
   3696 	/* Now clear reference and modify */
   3697 	ref = pmap_clear_reference(pg);
   3698 	mod = pmap_clear_modify(pg);
   3699 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3700 	       (void *)(u_long)va, (long)pa,
   3701 	       ref, mod);
   3702 
   3703 
   3704 	/* Modify page */
   3705 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
   3706 	pmap_update(pmap_kernel());
   3707 	*loc = 1;
   3708 
   3709 	ref = pmap_is_referenced(pg);
   3710 	mod = pmap_is_modified(pg);
   3711 	printf("Modified page: ref %d, mod %d\n",
   3712 	       ref, mod);
   3713 
   3714 	/* Check pmap_pag_protect() */
   3715 	pmap_page_protect(pg, VM_PROT_NONE);
   3716 	ref = pmap_is_referenced(pg);
   3717 	mod = pmap_is_modified(pg);
   3718 	printf("pmap_protect(): ref %d, mod %d\n",
   3719 	       ref, mod);
   3720 
   3721 	/* Now clear reference and modify */
   3722 	ref = pmap_clear_reference(pg);
   3723 	mod = pmap_clear_modify(pg);
   3724 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3725 	       (void *)(u_long)va, (long)pa,
   3726 	       ref, mod);
   3727 
   3728 	/* Unmap page */
   3729 	pmap_remove(pmap_kernel(), va, va+1);
   3730 	pmap_update(pmap_kernel());
   3731 	ref = pmap_is_referenced(pg);
   3732 	mod = pmap_is_modified(pg);
   3733 	printf("Unmapped page: ref %d, mod %d\n", ref, mod);
   3734 
   3735 	/* Now clear reference and modify */
   3736 	ref = pmap_clear_reference(pg);
   3737 	mod = pmap_clear_modify(pg);
   3738 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
   3739 	       (void *)(u_long)va, (long)pa, ref, mod);
   3740 
   3741 	/* Check it's properly cleared */
   3742 	ref = pmap_is_referenced(pg);
   3743 	mod = pmap_is_modified(pg);
   3744 	printf("Checking cleared page: ref %d, mod %d\n",
   3745 	       ref, mod);
   3746 
   3747 	pmap_remove(pmap_kernel(), va, va+1);
   3748 	pmap_update(pmap_kernel());
   3749 	pmap_free_page(pa, cpus_active);
   3750 }
   3751 #endif
   3752 
   3753 void
   3754 pmap_update(struct pmap *pmap)
   3755 {
   3756 
   3757 	if (pmap->pm_refs > 0) {
   3758 		return;
   3759 	}
   3760 	pmap->pm_refs = 1;
   3761 	pmap_activate_pmap(pmap);
   3762 }
   3763 
   3764 /*
   3765  * pmap_copy_page()/pmap_zero_page()
   3766  *
   3767  * we make sure that the destination page is flushed from all D$'s
   3768  * before we perform the copy/zero.
   3769  */
   3770 extern int cold;
   3771 void
   3772 pmap_copy_page(paddr_t src, paddr_t dst)
   3773 {
   3774 
   3775 	if (!cold)
   3776 		dcache_flush_page_all(dst);
   3777 	pmap_copy_page_phys(src, dst);
   3778 }
   3779 
   3780 void
   3781 pmap_zero_page(paddr_t pa)
   3782 {
   3783 
   3784 	if (!cold)
   3785 		dcache_flush_page_all(pa);
   3786 	pmap_zero_page_phys(pa);
   3787 }
   3788 
   3789 #ifdef _LP64
   3790 int
   3791 sparc64_mmap_range_test(vaddr_t addr, vaddr_t eaddr)
   3792 {
   3793 	const vaddr_t hole_start = 0x000007ffffffffff;
   3794 	const vaddr_t hole_end   = 0xfffff80000000000;
   3795 
   3796 	if (addr >= hole_end)
   3797 		return 0;
   3798 	if (eaddr <= hole_start)
   3799 		return 0;
   3800 
   3801 	return EINVAL;
   3802 }
   3803 #endif
   3804 
   3805 #ifdef SUN4V
   3806 void
   3807 pmap_setup_intstack_sun4v(paddr_t pa)
   3808 {
   3809 	int64_t hv_rc;
   3810 	int64_t data;
   3811 	data = SUN4V_TSB_DATA(
   3812 	    0 /* global */,
   3813 	    PGSZ_64K,
   3814 	    pa,
   3815 	    1 /* priv */,
   3816 	    1 /* Write */,
   3817 	    1 /* Cacheable */,
   3818 	    FORCE_ALIAS /* ALIAS -- Disable D$ */,
   3819 	    1 /* valid */,
   3820 	    0 /* IE */,
   3821 	    0 /* wc */);
   3822 	hv_rc = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB);
   3823 	if ( hv_rc != H_EOK ) {
   3824 		panic("hv_mmu_map_perm_addr() failed - rc = %" PRId64 "\n",
   3825 		    hv_rc);
   3826 	}
   3827 }
   3828 
   3829 void
   3830 pmap_setup_tsb_sun4v(struct tsb_desc* tsb_desc)
   3831 {
   3832 	int err;
   3833 	paddr_t tsb_desc_p;
   3834 	tsb_desc_p = pmap_kextract((vaddr_t)tsb_desc);
   3835 	if (!tsb_desc_p) {
   3836 		panic("pmap_setup_tsb_sun4v() pmap_kextract() failed");
   3837 	}
   3838 	err = hv_mmu_tsb_ctx0(1, tsb_desc_p);
   3839 	if (err != H_EOK) {
   3840 		prom_printf("hv_mmu_tsb_ctx0() err: %d\n", err);
   3841 		panic("pmap_setup_tsb_sun4v() hv_mmu_tsb_ctx0() failed");
   3842 	}
   3843 	err = hv_mmu_tsb_ctxnon0(1, tsb_desc_p);
   3844 	if (err != H_EOK) {
   3845 		prom_printf("hv_mmu_tsb_ctxnon0() err: %d\n", err);
   3846 		panic("pmap_setup_tsb_sun4v() hv_mmu_tsb_ctxnon0() failed");
   3847 	}
   3848 }
   3849 
   3850 #endif
   3851