Home | History | Annotate | Line # | Download | only in x86
xen_pmap.c revision 1.9
      1  1.9     jym /*	$NetBSD: xen_pmap.c,v 1.9 2011/11/20 19:41:27 jym Exp $	*/
      2  1.2   chuck 
      3  1.1  dyoung /*
      4  1.1  dyoung  * Copyright (c) 2007 Manuel Bouyer.
      5  1.1  dyoung  *
      6  1.1  dyoung  * Redistribution and use in source and binary forms, with or without
      7  1.1  dyoung  * modification, are permitted provided that the following conditions
      8  1.1  dyoung  * are met:
      9  1.1  dyoung  * 1. Redistributions of source code must retain the above copyright
     10  1.1  dyoung  *    notice, this list of conditions and the following disclaimer.
     11  1.1  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     12  1.1  dyoung  *    notice, this list of conditions and the following disclaimer in the
     13  1.1  dyoung  *    documentation and/or other materials provided with the distribution.
     14  1.1  dyoung  *
     15  1.1  dyoung  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  1.1  dyoung  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  1.1  dyoung  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  1.1  dyoung  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  1.1  dyoung  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     20  1.1  dyoung  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  1.1  dyoung  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  1.1  dyoung  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  1.1  dyoung  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     24  1.1  dyoung  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  1.1  dyoung  *
     26  1.1  dyoung  */
     27  1.1  dyoung 
     28  1.1  dyoung /*
     29  1.1  dyoung  * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
     30  1.1  dyoung  *
     31  1.1  dyoung  * Permission to use, copy, modify, and distribute this software for any
     32  1.1  dyoung  * purpose with or without fee is hereby granted, provided that the above
     33  1.1  dyoung  * copyright notice and this permission notice appear in all copies.
     34  1.1  dyoung  *
     35  1.1  dyoung  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     36  1.1  dyoung  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     37  1.1  dyoung  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     38  1.1  dyoung  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     39  1.1  dyoung  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     40  1.1  dyoung  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     41  1.1  dyoung  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     42  1.1  dyoung  */
     43  1.1  dyoung 
     44  1.1  dyoung /*
     45  1.1  dyoung  * Copyright (c) 1997 Charles D. Cranor and Washington University.
     46  1.1  dyoung  * All rights reserved.
     47  1.1  dyoung  *
     48  1.1  dyoung  * Redistribution and use in source and binary forms, with or without
     49  1.1  dyoung  * modification, are permitted provided that the following conditions
     50  1.1  dyoung  * are met:
     51  1.1  dyoung  * 1. Redistributions of source code must retain the above copyright
     52  1.1  dyoung  *    notice, this list of conditions and the following disclaimer.
     53  1.1  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     54  1.1  dyoung  *    notice, this list of conditions and the following disclaimer in the
     55  1.1  dyoung  *    documentation and/or other materials provided with the distribution.
     56  1.1  dyoung  *
     57  1.1  dyoung  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     58  1.1  dyoung  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     59  1.1  dyoung  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     60  1.1  dyoung  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     61  1.1  dyoung  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     62  1.1  dyoung  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     63  1.1  dyoung  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     64  1.1  dyoung  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     65  1.1  dyoung  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     66  1.1  dyoung  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     67  1.1  dyoung  */
     68  1.1  dyoung 
     69  1.1  dyoung /*
     70  1.1  dyoung  * Copyright 2001 (c) Wasabi Systems, Inc.
     71  1.1  dyoung  * All rights reserved.
     72  1.1  dyoung  *
     73  1.1  dyoung  * Written by Frank van der Linden for Wasabi Systems, Inc.
     74  1.1  dyoung  *
     75  1.1  dyoung  * Redistribution and use in source and binary forms, with or without
     76  1.1  dyoung  * modification, are permitted provided that the following conditions
     77  1.1  dyoung  * are met:
     78  1.1  dyoung  * 1. Redistributions of source code must retain the above copyright
     79  1.1  dyoung  *    notice, this list of conditions and the following disclaimer.
     80  1.1  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     81  1.1  dyoung  *    notice, this list of conditions and the following disclaimer in the
     82  1.1  dyoung  *    documentation and/or other materials provided with the distribution.
     83  1.1  dyoung  * 3. All advertising materials mentioning features or use of this software
     84  1.1  dyoung  *    must display the following acknowledgement:
     85  1.1  dyoung  *      This product includes software developed for the NetBSD Project by
     86  1.1  dyoung  *      Wasabi Systems, Inc.
     87  1.1  dyoung  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     88  1.1  dyoung  *    or promote products derived from this software without specific prior
     89  1.1  dyoung  *    written permission.
     90  1.1  dyoung  *
     91  1.1  dyoung  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     92  1.1  dyoung  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     93  1.1  dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     94  1.1  dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     95  1.1  dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     96  1.1  dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     97  1.1  dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     98  1.1  dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     99  1.1  dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    100  1.1  dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    101  1.1  dyoung  * POSSIBILITY OF SUCH DAMAGE.
    102  1.1  dyoung  */
    103  1.1  dyoung 
    104  1.1  dyoung #include <sys/cdefs.h>
    105  1.9     jym __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.9 2011/11/20 19:41:27 jym Exp $");
    106  1.1  dyoung 
    107  1.1  dyoung #include "opt_user_ldt.h"
    108  1.1  dyoung #include "opt_lockdebug.h"
    109  1.1  dyoung #include "opt_multiprocessor.h"
    110  1.1  dyoung #include "opt_xen.h"
    111  1.1  dyoung #if !defined(__x86_64__)
    112  1.1  dyoung #include "opt_kstack_dr0.h"
    113  1.1  dyoung #endif /* !defined(__x86_64__) */
    114  1.1  dyoung 
    115  1.1  dyoung #include <sys/param.h>
    116  1.1  dyoung #include <sys/systm.h>
    117  1.1  dyoung #include <sys/proc.h>
    118  1.1  dyoung #include <sys/pool.h>
    119  1.1  dyoung #include <sys/kernel.h>
    120  1.1  dyoung #include <sys/atomic.h>
    121  1.1  dyoung #include <sys/cpu.h>
    122  1.1  dyoung #include <sys/intr.h>
    123  1.1  dyoung #include <sys/xcall.h>
    124  1.1  dyoung 
    125  1.1  dyoung #include <uvm/uvm.h>
    126  1.1  dyoung 
    127  1.1  dyoung #include <dev/isa/isareg.h>
    128  1.1  dyoung 
    129  1.1  dyoung #include <machine/specialreg.h>
    130  1.1  dyoung #include <machine/gdt.h>
    131  1.1  dyoung #include <machine/isa_machdep.h>
    132  1.1  dyoung #include <machine/cpuvar.h>
    133  1.1  dyoung 
    134  1.1  dyoung #include <x86/pmap.h>
    135  1.1  dyoung #include <x86/pmap_pv.h>
    136  1.1  dyoung 
    137  1.1  dyoung #include <x86/i82489reg.h>
    138  1.1  dyoung #include <x86/i82489var.h>
    139  1.1  dyoung 
    140  1.1  dyoung #ifdef XEN
    141  1.1  dyoung #include <xen/xen3-public/xen.h>
    142  1.1  dyoung #include <xen/hypervisor.h>
    143  1.1  dyoung #endif
    144  1.1  dyoung 
    145  1.3   rmind #define COUNT(x)	/* nothing */
    146  1.3   rmind 
    147  1.3   rmind static pd_entry_t * const alternate_pdes[] = APDES_INITIALIZER;
    148  1.3   rmind extern pd_entry_t * const normal_pdes[];
    149  1.3   rmind 
    150  1.1  dyoung extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
    151  1.1  dyoung extern paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
    152  1.1  dyoung 
    153  1.3   rmind void
    154  1.3   rmind pmap_apte_flush(struct pmap *pmap)
    155  1.3   rmind {
    156  1.3   rmind 
    157  1.3   rmind 	KASSERT(kpreempt_disabled());
    158  1.3   rmind 
    159  1.3   rmind 	/*
    160  1.3   rmind 	 * Flush the APTE mapping from all other CPUs that
    161  1.3   rmind 	 * are using the pmap we are using (who's APTE space
    162  1.3   rmind 	 * is the one we've just modified).
    163  1.3   rmind 	 *
    164  1.3   rmind 	 * XXXthorpej -- find a way to defer the IPI.
    165  1.3   rmind 	 */
    166  1.3   rmind 	pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_APTE);
    167  1.3   rmind 	pmap_tlb_shootnow();
    168  1.3   rmind }
    169  1.3   rmind 
    170  1.3   rmind /*
    171  1.3   rmind  * Unmap the content of APDP PDEs
    172  1.3   rmind  */
    173  1.3   rmind void
    174  1.3   rmind pmap_unmap_apdp(void)
    175  1.3   rmind {
    176  1.3   rmind 	int i;
    177  1.3   rmind 
    178  1.3   rmind 	for (i = 0; i < PDP_SIZE; i++) {
    179  1.3   rmind 		pmap_pte_set(APDP_PDE+i, 0);
    180  1.3   rmind #if defined (PAE)
    181  1.5     jym 		/*
    182  1.5     jym 		 * For PAE, there are two places where alternative recursive
    183  1.5     jym 		 * mappings could be found with Xen:
    184  1.5     jym 		 * - in the L2 shadow pages
    185  1.5     jym 		 * - the "real" L2 kernel page (pmap_kl2pd), which is unique
    186  1.5     jym 		 * and static.
    187  1.5     jym 		 * We first clear the APDP for the current pmap. As L2 kernel
    188  1.5     jym 		 * page is unique, we only need to do it once for all pmaps.
    189  1.5     jym 		 */
    190  1.3   rmind 		pmap_pte_set(APDP_PDE_SHADOW+i, 0);
    191  1.3   rmind #endif
    192  1.3   rmind 	}
    193  1.3   rmind }
    194  1.3   rmind 
    195  1.3   rmind /*
    196  1.3   rmind  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
    197  1.3   rmind  *
    198  1.3   rmind  * => we lock enough pmaps to keep things locked in
    199  1.3   rmind  * => must be undone with pmap_unmap_ptes before returning
    200  1.3   rmind  */
    201  1.3   rmind 
    202  1.3   rmind void
    203  1.3   rmind pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
    204  1.3   rmind 	      pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
    205  1.3   rmind {
    206  1.3   rmind 	pd_entry_t opde, npde;
    207  1.3   rmind 	struct pmap *ourpmap;
    208  1.3   rmind 	struct cpu_info *ci;
    209  1.3   rmind 	struct lwp *l;
    210  1.3   rmind 	bool iscurrent;
    211  1.3   rmind 	uint64_t ncsw;
    212  1.3   rmind 	int s;
    213  1.3   rmind 
    214  1.3   rmind 	/* the kernel's pmap is always accessible */
    215  1.3   rmind 	if (pmap == pmap_kernel()) {
    216  1.3   rmind 		*pmap2 = NULL;
    217  1.3   rmind 		*ptepp = PTE_BASE;
    218  1.3   rmind 		*pdeppp = normal_pdes;
    219  1.3   rmind 		return;
    220  1.3   rmind 	}
    221  1.3   rmind 	KASSERT(kpreempt_disabled());
    222  1.3   rmind 
    223  1.3   rmind  retry:
    224  1.3   rmind 	l = curlwp;
    225  1.3   rmind 	ncsw = l->l_ncsw;
    226  1.3   rmind  	ourpmap = NULL;
    227  1.3   rmind 	ci = curcpu();
    228  1.3   rmind #if defined(__x86_64__)
    229  1.3   rmind 	/*
    230  1.3   rmind 	 * curmap can only be pmap_kernel so at this point
    231  1.3   rmind 	 * pmap_is_curpmap is always false
    232  1.3   rmind 	 */
    233  1.3   rmind 	iscurrent = 0;
    234  1.3   rmind 	ourpmap = pmap_kernel();
    235  1.3   rmind #else /* __x86_64__*/
    236  1.3   rmind 	if (ci->ci_want_pmapload &&
    237  1.3   rmind 	    vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
    238  1.3   rmind 		pmap_load();
    239  1.3   rmind 		if (l->l_ncsw != ncsw)
    240  1.3   rmind 			goto retry;
    241  1.3   rmind 	}
    242  1.3   rmind 	iscurrent = pmap_is_curpmap(pmap);
    243  1.3   rmind 	/* if curpmap then we are always mapped */
    244  1.3   rmind 	if (iscurrent) {
    245  1.3   rmind 		mutex_enter(pmap->pm_lock);
    246  1.3   rmind 		*pmap2 = NULL;
    247  1.3   rmind 		*ptepp = PTE_BASE;
    248  1.3   rmind 		*pdeppp = normal_pdes;
    249  1.3   rmind 		goto out;
    250  1.3   rmind 	}
    251  1.3   rmind 	ourpmap = ci->ci_pmap;
    252  1.3   rmind #endif /* __x86_64__ */
    253  1.3   rmind 
    254  1.3   rmind 	/* need to lock both curpmap and pmap: use ordered locking */
    255  1.3   rmind 	pmap_reference(ourpmap);
    256  1.3   rmind 	if ((uintptr_t) pmap < (uintptr_t) ourpmap) {
    257  1.3   rmind 		mutex_enter(pmap->pm_lock);
    258  1.3   rmind 		mutex_enter(ourpmap->pm_lock);
    259  1.3   rmind 	} else {
    260  1.3   rmind 		mutex_enter(ourpmap->pm_lock);
    261  1.3   rmind 		mutex_enter(pmap->pm_lock);
    262  1.3   rmind 	}
    263  1.3   rmind 
    264  1.3   rmind 	if (l->l_ncsw != ncsw)
    265  1.3   rmind 		goto unlock_and_retry;
    266  1.3   rmind 
    267  1.3   rmind 	/* need to load a new alternate pt space into curpmap? */
    268  1.3   rmind 	COUNT(apdp_pde_map);
    269  1.3   rmind 	opde = *APDP_PDE;
    270  1.3   rmind 	if (!pmap_valid_entry(opde) ||
    271  1.3   rmind 	    pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) {
    272  1.3   rmind 		int i;
    273  1.3   rmind 		s = splvm();
    274  1.3   rmind 		/* Make recursive entry usable in user PGD */
    275  1.3   rmind 		for (i = 0; i < PDP_SIZE; i++) {
    276  1.3   rmind 			npde = pmap_pa2pte(
    277  1.3   rmind 			    pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V;
    278  1.3   rmind 			xpq_queue_pte_update(
    279  1.3   rmind 			    xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)),
    280  1.3   rmind 			    npde);
    281  1.3   rmind 			xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]),
    282  1.3   rmind 			    npde);
    283  1.3   rmind #ifdef PAE
    284  1.3   rmind 			/* update shadow entry too */
    285  1.3   rmind 			xpq_queue_pte_update(
    286  1.3   rmind 			    xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde);
    287  1.3   rmind #endif /* PAE */
    288  1.3   rmind 			xpq_queue_invlpg(
    289  1.3   rmind 			    (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]);
    290  1.3   rmind 		}
    291  1.3   rmind 		if (pmap_valid_entry(opde))
    292  1.3   rmind 			pmap_apte_flush(ourpmap);
    293  1.3   rmind 		splx(s);
    294  1.3   rmind 	}
    295  1.3   rmind 	*pmap2 = ourpmap;
    296  1.3   rmind 	*ptepp = APTE_BASE;
    297  1.3   rmind 	*pdeppp = alternate_pdes;
    298  1.3   rmind 	KASSERT(l->l_ncsw == ncsw);
    299  1.3   rmind #if !defined(__x86_64__)
    300  1.3   rmind  out:
    301  1.3   rmind #endif
    302  1.3   rmind  	/*
    303  1.3   rmind  	 * might have blocked, need to retry?
    304  1.3   rmind  	 */
    305  1.3   rmind 	if (l->l_ncsw != ncsw) {
    306  1.3   rmind  unlock_and_retry:
    307  1.3   rmind 	    	if (ourpmap != NULL) {
    308  1.3   rmind 			mutex_exit(ourpmap->pm_lock);
    309  1.3   rmind 			pmap_destroy(ourpmap);
    310  1.3   rmind 		}
    311  1.3   rmind 		mutex_exit(pmap->pm_lock);
    312  1.3   rmind 		goto retry;
    313  1.3   rmind 	}
    314  1.3   rmind }
    315  1.3   rmind 
    316  1.3   rmind /*
    317  1.3   rmind  * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
    318  1.3   rmind  */
    319  1.3   rmind 
    320  1.3   rmind void
    321  1.3   rmind pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
    322  1.3   rmind {
    323  1.3   rmind 
    324  1.3   rmind 	if (pmap == pmap_kernel()) {
    325  1.3   rmind 		return;
    326  1.3   rmind 	}
    327  1.3   rmind 	KASSERT(kpreempt_disabled());
    328  1.3   rmind 	if (pmap2 == NULL) {
    329  1.3   rmind 		mutex_exit(pmap->pm_lock);
    330  1.3   rmind 	} else {
    331  1.3   rmind #if defined(__x86_64__)
    332  1.3   rmind 		KASSERT(pmap2 == pmap_kernel());
    333  1.3   rmind #else
    334  1.3   rmind 		KASSERT(curcpu()->ci_pmap == pmap2);
    335  1.3   rmind #endif
    336  1.3   rmind #if defined(MULTIPROCESSOR)
    337  1.3   rmind 		pmap_unmap_apdp();
    338  1.3   rmind 		pmap_pte_flush();
    339  1.3   rmind 		pmap_apte_flush(pmap2);
    340  1.3   rmind #endif /* MULTIPROCESSOR */
    341  1.3   rmind 		COUNT(apdp_pde_unmap);
    342  1.3   rmind 		mutex_exit(pmap->pm_lock);
    343  1.3   rmind 		mutex_exit(pmap2->pm_lock);
    344  1.3   rmind 		pmap_destroy(pmap2);
    345  1.3   rmind 	}
    346  1.3   rmind }
    347  1.3   rmind 
    348  1.1  dyoung int
    349  1.1  dyoung pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
    350  1.1  dyoung {
    351  1.1  dyoung         paddr_t ma;
    352  1.1  dyoung 
    353  1.1  dyoung 	if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
    354  1.1  dyoung 		ma = pa; /* XXX hack */
    355  1.1  dyoung 	} else {
    356  1.1  dyoung 		ma = xpmap_ptom(pa);
    357  1.1  dyoung 	}
    358  1.1  dyoung 
    359  1.1  dyoung 	return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
    360  1.1  dyoung }
    361  1.1  dyoung 
    362  1.1  dyoung /*
    363  1.1  dyoung  * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
    364  1.1  dyoung  *
    365  1.1  dyoung  * => no need to lock anything, assume va is already allocated
    366  1.1  dyoung  * => should be faster than normal pmap enter function
    367  1.1  dyoung  * => we expect a MACHINE address
    368  1.1  dyoung  */
    369  1.1  dyoung 
    370  1.1  dyoung void
    371  1.1  dyoung pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags)
    372  1.1  dyoung {
    373  1.1  dyoung 	pt_entry_t *pte, opte, npte;
    374  1.1  dyoung 
    375  1.1  dyoung 	if (va < VM_MIN_KERNEL_ADDRESS)
    376  1.1  dyoung 		pte = vtopte(va);
    377  1.1  dyoung 	else
    378  1.1  dyoung 		pte = kvtopte(va);
    379  1.1  dyoung 
    380  1.1  dyoung 	npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
    381  1.1  dyoung 	     PG_V | PG_k;
    382  1.1  dyoung 	if (flags & PMAP_NOCACHE)
    383  1.1  dyoung 		npte |= PG_N;
    384  1.1  dyoung 
    385  1.1  dyoung 	if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE))
    386  1.1  dyoung 		npte |= PG_NX;
    387  1.1  dyoung 
    388  1.1  dyoung 	opte = pmap_pte_testset (pte, npte); /* zap! */
    389  1.1  dyoung 
    390  1.1  dyoung 	if (pmap_valid_entry(opte)) {
    391  1.1  dyoung #if defined(MULTIPROCESSOR)
    392  1.1  dyoung 		kpreempt_disable();
    393  1.3   rmind 		pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
    394  1.1  dyoung 		kpreempt_enable();
    395  1.1  dyoung #else
    396  1.1  dyoung 		/* Don't bother deferring in the single CPU case. */
    397  1.1  dyoung 		pmap_update_pg(va);
    398  1.1  dyoung #endif
    399  1.1  dyoung 	}
    400  1.1  dyoung }
    401  1.1  dyoung 
    402  1.1  dyoung /*
    403  1.1  dyoung  * pmap_extract_ma: extract a MA for the given VA
    404  1.1  dyoung  */
    405  1.1  dyoung 
    406  1.1  dyoung bool
    407  1.1  dyoung pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)
    408  1.1  dyoung {
    409  1.1  dyoung 	pt_entry_t *ptes, pte;
    410  1.1  dyoung 	pd_entry_t pde;
    411  1.1  dyoung 	pd_entry_t * const *pdes;
    412  1.1  dyoung 	struct pmap *pmap2;
    413  1.5     jym 
    414  1.1  dyoung 	kpreempt_disable();
    415  1.1  dyoung 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
    416  1.1  dyoung 	if (!pmap_pdes_valid(va, pdes, &pde)) {
    417  1.1  dyoung 		pmap_unmap_ptes(pmap, pmap2);
    418  1.1  dyoung 		kpreempt_enable();
    419  1.1  dyoung 		return false;
    420  1.1  dyoung 	}
    421  1.5     jym 
    422  1.1  dyoung 	pte = ptes[pl1_i(va)];
    423  1.1  dyoung 	pmap_unmap_ptes(pmap, pmap2);
    424  1.1  dyoung 	kpreempt_enable();
    425  1.5     jym 
    426  1.1  dyoung 	if (__predict_true((pte & PG_V) != 0)) {
    427  1.1  dyoung 		if (pap != NULL)
    428  1.1  dyoung 			*pap = (pte & PG_FRAME) | (va & (NBPD_L1 - 1));
    429  1.1  dyoung 		return true;
    430  1.1  dyoung 	}
    431  1.5     jym 
    432  1.1  dyoung 	return false;
    433  1.1  dyoung }
    434  1.6     jym 
    435  1.6     jym /*
    436  1.6     jym  * Flush all APDP entries found in pmaps
    437  1.9     jym  * Required during Xen save/restore operations, as Xen does not
    438  1.6     jym  * handle alternative recursive mappings properly
    439  1.6     jym  */
    440  1.6     jym void
    441  1.9     jym pmap_xen_suspend(void)
    442  1.6     jym {
    443  1.6     jym 	int i;
    444  1.6     jym 	int s;
    445  1.6     jym 	struct pmap *pm;
    446  1.6     jym 
    447  1.6     jym 	s = splvm();
    448  1.6     jym 
    449  1.6     jym 	pmap_unmap_apdp();
    450  1.6     jym 
    451  1.6     jym 	mutex_enter(&pmaps_lock);
    452  1.6     jym 	/*
    453  1.6     jym 	 * Set APDP entries to 0 in all pmaps.
    454  1.6     jym 	 * Note that for PAE kernels, this only clears the APDP entries
    455  1.6     jym 	 * found in the L2 shadow pages, as pmap_pdirpa() is used to obtain
    456  1.6     jym 	 * the PA of the pmap->pm_pdir[] pages (forming the 4 contiguous
    457  1.6     jym 	 * pages of PAE PD: 3 for user space, 1 for the L2 kernel shadow page)
    458  1.6     jym 	 */
    459  1.6     jym 	LIST_FOREACH(pm, &pmaps, pm_list) {
    460  1.6     jym 		for (i = 0; i < PDP_SIZE; i++) {
    461  1.6     jym 			xpq_queue_pte_update(
    462  1.6     jym 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_APTE + i)),
    463  1.6     jym 			    0);
    464  1.6     jym 		}
    465  1.6     jym 	}
    466  1.6     jym 	mutex_exit(&pmaps_lock);
    467  1.6     jym 
    468  1.6     jym 	xpq_flush_queue();
    469  1.6     jym 
    470  1.6     jym 	splx(s);
    471  1.6     jym 
    472  1.9     jym #ifdef PAE
    473  1.9     jym 	pmap_unmap_recursive_entries();
    474  1.9     jym #endif
    475  1.9     jym }
    476  1.9     jym 
    477  1.9     jym void
    478  1.9     jym pmap_xen_resume(void)
    479  1.9     jym {
    480  1.9     jym #ifdef PAE
    481  1.9     jym 	pmap_map_recursive_entries();
    482  1.9     jym #endif
    483  1.6     jym }
    484  1.6     jym 
    485  1.6     jym #ifdef PAE
    486  1.6     jym /*
    487  1.6     jym  * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not
    488  1.6     jym  * handle them correctly during save/restore, leading to incorrect page
    489  1.6     jym  * tracking and pinning during restore.
    490  1.6     jym  * For save/restore to succeed, two functions are introduced:
    491  1.6     jym  * - pmap_map_recursive_entries(), used by resume code to set the recursive
    492  1.6     jym  *   mapping entries to their correct value
    493  1.6     jym  * - pmap_unmap_recursive_entries(), used by suspend code to clear all
    494  1.6     jym  *   PDIR_SLOT_PTE entries
    495  1.6     jym  */
    496  1.6     jym void
    497  1.6     jym pmap_map_recursive_entries(void)
    498  1.6     jym {
    499  1.6     jym 	int i;
    500  1.6     jym 	struct pmap *pm;
    501  1.6     jym 
    502  1.6     jym 	mutex_enter(&pmaps_lock);
    503  1.6     jym 	LIST_FOREACH(pm, &pmaps, pm_list) {
    504  1.6     jym 		for (i = 0; i < PDP_SIZE; i++) {
    505  1.6     jym 			xpq_queue_pte_update(
    506  1.6     jym 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)),
    507  1.6     jym 			    xpmap_ptom((pm)->pm_pdirpa[i]) | PG_V);
    508  1.6     jym 		}
    509  1.6     jym 	}
    510  1.6     jym 	mutex_exit(&pmaps_lock);
    511  1.6     jym 
    512  1.6     jym 	for (i = 0; i < PDP_SIZE; i++) {
    513  1.6     jym 		xpq_queue_pte_update(
    514  1.6     jym 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
    515  1.6     jym 		    xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PG_V);
    516  1.6     jym 	}
    517  1.6     jym 
    518  1.6     jym 	xpq_flush_queue();
    519  1.6     jym }
    520  1.6     jym 
    521  1.6     jym void
    522  1.6     jym pmap_unmap_recursive_entries(void)
    523  1.6     jym {
    524  1.6     jym 	int i;
    525  1.6     jym 	struct pmap *pm;
    526  1.6     jym 
    527  1.9     jym 	/*
    528  1.9     jym 	 * Invalidate pmap_pdp_cache as it contains L2-pinned objects with
    529  1.9     jym 	 * recursive entries.
    530  1.9     jym 	 * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv
    531  1.9     jym 	 * does not do that.
    532  1.9     jym 	 */
    533  1.9     jym 	pool_cache_invalidate(&pmap_pdp_cache);
    534  1.6     jym 
    535  1.6     jym 	mutex_enter(&pmaps_lock);
    536  1.6     jym 	LIST_FOREACH(pm, &pmaps, pm_list) {
    537  1.6     jym 		for (i = 0; i < PDP_SIZE; i++) {
    538  1.6     jym 			xpq_queue_pte_update(
    539  1.6     jym 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0);
    540  1.6     jym 		}
    541  1.6     jym 	}
    542  1.6     jym 	mutex_exit(&pmaps_lock);
    543  1.6     jym 
    544  1.6     jym 	/* do it for pmap_kernel() too! */
    545  1.6     jym 	for (i = 0; i < PDP_SIZE; i++)
    546  1.6     jym 		xpq_queue_pte_update(
    547  1.6     jym 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
    548  1.6     jym 		    0);
    549  1.6     jym 
    550  1.6     jym 	xpq_flush_queue();
    551  1.6     jym 
    552  1.6     jym }
    553  1.6     jym #endif /* PAE */
    554