Home | History | Annotate | Line # | Download | only in x86
xen_pmap.c revision 1.13
      1 /*	$NetBSD: xen_pmap.c,v 1.13 2012/01/09 12:58:49 cherry Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2007 Manuel Bouyer.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  *
     26  */
     27 
     28 /*
     29  * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
     30  *
     31  * Permission to use, copy, modify, and distribute this software for any
     32  * purpose with or without fee is hereby granted, provided that the above
     33  * copyright notice and this permission notice appear in all copies.
     34  *
     35  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     36  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     37  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     38  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     39  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     40  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     41  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     42  */
     43 
     44 /*
     45  * Copyright (c) 1997 Charles D. Cranor and Washington University.
     46  * All rights reserved.
     47  *
     48  * Redistribution and use in source and binary forms, with or without
     49  * modification, are permitted provided that the following conditions
     50  * are met:
     51  * 1. Redistributions of source code must retain the above copyright
     52  *    notice, this list of conditions and the following disclaimer.
     53  * 2. Redistributions in binary form must reproduce the above copyright
     54  *    notice, this list of conditions and the following disclaimer in the
     55  *    documentation and/or other materials provided with the distribution.
     56  *
     57  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     58  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     59  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     60  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     61  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     62  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     63  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     64  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     65  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     66  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright 2001 (c) Wasabi Systems, Inc.
     71  * All rights reserved.
     72  *
     73  * Written by Frank van der Linden for Wasabi Systems, Inc.
     74  *
     75  * Redistribution and use in source and binary forms, with or without
     76  * modification, are permitted provided that the following conditions
     77  * are met:
     78  * 1. Redistributions of source code must retain the above copyright
     79  *    notice, this list of conditions and the following disclaimer.
     80  * 2. Redistributions in binary form must reproduce the above copyright
     81  *    notice, this list of conditions and the following disclaimer in the
     82  *    documentation and/or other materials provided with the distribution.
     83  * 3. All advertising materials mentioning features or use of this software
     84  *    must display the following acknowledgement:
     85  *      This product includes software developed for the NetBSD Project by
     86  *      Wasabi Systems, Inc.
     87  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     88  *    or promote products derived from this software without specific prior
     89  *    written permission.
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     92  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     93  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     95  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     96  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     97  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     98  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     99  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    100  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    101  * POSSIBILITY OF SUCH DAMAGE.
    102  */
    103 
    104 #include <sys/cdefs.h>
    105 __KERNEL_RCSID(0, "$NetBSD: xen_pmap.c,v 1.13 2012/01/09 12:58:49 cherry Exp $");
    106 
    107 #include "opt_user_ldt.h"
    108 #include "opt_lockdebug.h"
    109 #include "opt_multiprocessor.h"
    110 #include "opt_xen.h"
    111 #if !defined(__x86_64__)
    112 #include "opt_kstack_dr0.h"
    113 #endif /* !defined(__x86_64__) */
    114 
    115 #include <sys/param.h>
    116 #include <sys/systm.h>
    117 #include <sys/proc.h>
    118 #include <sys/pool.h>
    119 #include <sys/kernel.h>
    120 #include <sys/atomic.h>
    121 #include <sys/cpu.h>
    122 #include <sys/intr.h>
    123 #include <sys/xcall.h>
    124 
    125 #include <uvm/uvm.h>
    126 
    127 #include <dev/isa/isareg.h>
    128 
    129 #include <machine/specialreg.h>
    130 #include <machine/gdt.h>
    131 #include <machine/isa_machdep.h>
    132 #include <machine/cpuvar.h>
    133 
    134 #include <x86/pmap.h>
    135 #include <x86/pmap_pv.h>
    136 
    137 #include <x86/i82489reg.h>
    138 #include <x86/i82489var.h>
    139 
    140 #include <xen/xen-public/xen.h>
    141 #include <xen/hypervisor.h>
    142 #include <xen/xenpmap.h>
    143 
    144 #define COUNT(x)	/* nothing */
    145 
    146 static pd_entry_t * const alternate_pdes[] = APDES_INITIALIZER;
    147 extern pd_entry_t * const normal_pdes[];
    148 
    149 extern paddr_t pmap_pa_start; /* PA of first physical page for this domain */
    150 extern paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
    151 
    152 void
    153 pmap_apte_flush(struct pmap *pmap)
    154 {
    155 
    156 	KASSERT(kpreempt_disabled());
    157 
    158 	/*
    159 	 * Flush the APTE mapping from all other CPUs that
    160 	 * are using the pmap we are using (who's APTE space
    161 	 * is the one we've just modified).
    162 	 *
    163 	 * XXXthorpej -- find a way to defer the IPI.
    164 	 */
    165 	pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_APTE);
    166 	pmap_tlb_shootnow();
    167 }
    168 
    169 /*
    170  * Unmap the content of APDP PDEs
    171  */
    172 void
    173 pmap_unmap_apdp(void)
    174 {
    175 	int i;
    176 
    177 	for (i = 0; i < PDP_SIZE; i++) {
    178 		pmap_pte_set(APDP_PDE+i, 0);
    179 #if defined (PAE)
    180 		/*
    181 		 * For PAE, there are two places where alternative recursive
    182 		 * mappings could be found with Xen:
    183 		 * - in the L2 shadow pages
    184 		 * - the "real" L2 kernel page (pmap_kl2pd), which is unique
    185 		 * and static.
    186 		 * We first clear the APDP for the current pmap. As L2 kernel
    187 		 * page is unique, we only need to do it once for all pmaps.
    188 		 */
    189 		pmap_pte_set(APDP_PDE_SHADOW+i, 0);
    190 #endif
    191 	}
    192 }
    193 
    194 /*
    195  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
    196  *
    197  * => we lock enough pmaps to keep things locked in
    198  * => must be undone with pmap_unmap_ptes before returning
    199  */
    200 
    201 void
    202 pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
    203 	      pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
    204 {
    205 	pd_entry_t opde, npde;
    206 	struct pmap *ourpmap;
    207 	struct cpu_info *ci;
    208 	struct lwp *l;
    209 	bool iscurrent;
    210 	uint64_t ncsw;
    211 	int s;
    212 
    213 	/* the kernel's pmap is always accessible */
    214 	if (pmap == pmap_kernel()) {
    215 		*pmap2 = NULL;
    216 		*ptepp = PTE_BASE;
    217 		*pdeppp = normal_pdes;
    218 		return;
    219 	}
    220 	KASSERT(kpreempt_disabled());
    221 
    222  retry:
    223 	l = curlwp;
    224 	ncsw = l->l_ncsw;
    225  	ourpmap = NULL;
    226 	ci = curcpu();
    227 #if defined(__x86_64__)
    228 	/*
    229 	 * curmap can only be pmap_kernel so at this point
    230 	 * pmap_is_curpmap is always false
    231 	 */
    232 	iscurrent = 0;
    233 	ourpmap = pmap_kernel();
    234 #else /* __x86_64__*/
    235 	if (ci->ci_want_pmapload &&
    236 	    vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
    237 		pmap_load();
    238 		if (l->l_ncsw != ncsw)
    239 			goto retry;
    240 	}
    241 	iscurrent = pmap_is_curpmap(pmap);
    242 	/* if curpmap then we are always mapped */
    243 	if (iscurrent) {
    244 		mutex_enter(pmap->pm_lock);
    245 		*pmap2 = NULL;
    246 		*ptepp = PTE_BASE;
    247 		*pdeppp = normal_pdes;
    248 		goto out;
    249 	}
    250 	ourpmap = ci->ci_pmap;
    251 #endif /* __x86_64__ */
    252 
    253 	/* need to lock both curpmap and pmap: use ordered locking */
    254 	pmap_reference(ourpmap);
    255 	if ((uintptr_t) pmap < (uintptr_t) ourpmap) {
    256 		mutex_enter(pmap->pm_lock);
    257 		mutex_enter(ourpmap->pm_lock);
    258 	} else {
    259 		mutex_enter(ourpmap->pm_lock);
    260 		mutex_enter(pmap->pm_lock);
    261 	}
    262 
    263 	if (l->l_ncsw != ncsw)
    264 		goto unlock_and_retry;
    265 
    266 	/* need to load a new alternate pt space into curpmap? */
    267 	COUNT(apdp_pde_map);
    268 	opde = *APDP_PDE;
    269 	if (!pmap_valid_entry(opde) ||
    270 	    pmap_pte2pa(opde) != pmap_pdirpa(pmap, 0)) {
    271 		int i;
    272 		s = splvm();
    273 		/* Make recursive entry usable in user PGD */
    274 		for (i = 0; i < PDP_SIZE; i++) {
    275 			npde = pmap_pa2pte(
    276 			    pmap_pdirpa(pmap, i * NPDPG)) | PG_k | PG_V;
    277 			xpq_queue_pte_update(
    278 			    xpmap_ptom(pmap_pdirpa(pmap, PDIR_SLOT_PTE + i)),
    279 			    npde);
    280 			xpq_queue_pte_update(xpmap_ptetomach(&APDP_PDE[i]),
    281 			    npde);
    282 #ifdef PAE
    283 			/* update shadow entry too */
    284 			xpq_queue_pte_update(
    285 			    xpmap_ptetomach(&APDP_PDE_SHADOW[i]), npde);
    286 #endif /* PAE */
    287 			xpq_queue_invlpg(
    288 			    (vaddr_t)&pmap->pm_pdir[PDIR_SLOT_PTE + i]);
    289 		}
    290 		if (pmap_valid_entry(opde))
    291 			pmap_apte_flush(ourpmap);
    292 		splx(s);
    293 	}
    294 	*pmap2 = ourpmap;
    295 	*ptepp = APTE_BASE;
    296 	*pdeppp = alternate_pdes;
    297 	KASSERT(l->l_ncsw == ncsw);
    298 #if !defined(__x86_64__)
    299  out:
    300 #endif
    301  	/*
    302  	 * might have blocked, need to retry?
    303  	 */
    304 	if (l->l_ncsw != ncsw) {
    305  unlock_and_retry:
    306 	    	if (ourpmap != NULL) {
    307 			mutex_exit(ourpmap->pm_lock);
    308 			pmap_destroy(ourpmap);
    309 		}
    310 		mutex_exit(pmap->pm_lock);
    311 		goto retry;
    312 	}
    313 }
    314 
    315 /*
    316  * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
    317  */
    318 
    319 void
    320 pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
    321 {
    322 
    323 	if (pmap == pmap_kernel()) {
    324 		return;
    325 	}
    326 	KASSERT(kpreempt_disabled());
    327 	if (pmap2 == NULL) {
    328 		mutex_exit(pmap->pm_lock);
    329 	} else {
    330 #if defined(__x86_64__)
    331 		KASSERT(pmap2 == pmap_kernel());
    332 #else
    333 		KASSERT(curcpu()->ci_pmap == pmap2);
    334 #endif
    335 #if defined(MULTIPROCESSOR)
    336 		pmap_unmap_apdp();
    337 		pmap_pte_flush();
    338 		pmap_apte_flush(pmap2);
    339 #endif /* MULTIPROCESSOR */
    340 		COUNT(apdp_pde_unmap);
    341 		mutex_exit(pmap->pm_lock);
    342 		mutex_exit(pmap2->pm_lock);
    343 		pmap_destroy(pmap2);
    344 	}
    345 }
    346 
    347 int
    348 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
    349 {
    350         paddr_t ma;
    351 
    352 	if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
    353 		ma = pa; /* XXX hack */
    354 	} else {
    355 		ma = xpmap_ptom(pa);
    356 	}
    357 
    358 	return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
    359 }
    360 
    361 /*
    362  * pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
    363  *
    364  * => no need to lock anything, assume va is already allocated
    365  * => should be faster than normal pmap enter function
    366  * => we expect a MACHINE address
    367  */
    368 
    369 void
    370 pmap_kenter_ma(vaddr_t va, paddr_t ma, vm_prot_t prot, u_int flags)
    371 {
    372 	pt_entry_t *pte, opte, npte;
    373 
    374 	if (va < VM_MIN_KERNEL_ADDRESS)
    375 		pte = vtopte(va);
    376 	else
    377 		pte = kvtopte(va);
    378 
    379 	npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
    380 	     PG_V | PG_k;
    381 	if (flags & PMAP_NOCACHE)
    382 		npte |= PG_N;
    383 
    384 	if ((cpu_feature[2] & CPUID_NOX) && !(prot & VM_PROT_EXECUTE))
    385 		npte |= PG_NX;
    386 
    387 	opte = pmap_pte_testset (pte, npte); /* zap! */
    388 
    389 	if (pmap_valid_entry(opte)) {
    390 #if defined(MULTIPROCESSOR)
    391 		kpreempt_disable();
    392 		pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
    393 		kpreempt_enable();
    394 #else
    395 		/* Don't bother deferring in the single CPU case. */
    396 		pmap_update_pg(va);
    397 #endif
    398 	}
    399 }
    400 
    401 /*
    402  * pmap_extract_ma: extract a MA for the given VA
    403  */
    404 
    405 bool
    406 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)
    407 {
    408 	pt_entry_t *ptes, pte;
    409 	pd_entry_t pde;
    410 	pd_entry_t * const *pdes;
    411 	struct pmap *pmap2;
    412 
    413 	kpreempt_disable();
    414 	pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
    415 	if (!pmap_pdes_valid(va, pdes, &pde)) {
    416 		pmap_unmap_ptes(pmap, pmap2);
    417 		kpreempt_enable();
    418 		return false;
    419 	}
    420 
    421 	pte = ptes[pl1_i(va)];
    422 	pmap_unmap_ptes(pmap, pmap2);
    423 	kpreempt_enable();
    424 
    425 	if (__predict_true((pte & PG_V) != 0)) {
    426 		if (pap != NULL)
    427 			*pap = (pte & PG_FRAME) | (va & (NBPD_L1 - 1));
    428 		return true;
    429 	}
    430 
    431 	return false;
    432 }
    433 
    434 /*
    435  * Flush all APDP entries found in pmaps
    436  * Required during Xen save/restore operations, as Xen does not
    437  * handle alternative recursive mappings properly
    438  */
    439 void
    440 pmap_xen_suspend(void)
    441 {
    442 	int i;
    443 	int s;
    444 	struct pmap *pm;
    445 
    446 	s = splvm();
    447 
    448 	pmap_unmap_apdp();
    449 
    450 	mutex_enter(&pmaps_lock);
    451 	/*
    452 	 * Set APDP entries to 0 in all pmaps.
    453 	 * Note that for PAE kernels, this only clears the APDP entries
    454 	 * found in the L2 shadow pages, as pmap_pdirpa() is used to obtain
    455 	 * the PA of the pmap->pm_pdir[] pages (forming the 4 contiguous
    456 	 * pages of PAE PD: 3 for user space, 1 for the L2 kernel shadow page)
    457 	 */
    458 	LIST_FOREACH(pm, &pmaps, pm_list) {
    459 		for (i = 0; i < PDP_SIZE; i++) {
    460 			xpq_queue_pte_update(
    461 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_APTE + i)),
    462 			    0);
    463 		}
    464 	}
    465 	mutex_exit(&pmaps_lock);
    466 
    467 	xpq_flush_queue();
    468 
    469 	splx(s);
    470 
    471 #ifdef PAE
    472 	pmap_unmap_recursive_entries();
    473 #endif
    474 }
    475 
    476 void
    477 pmap_xen_resume(void)
    478 {
    479 #ifdef PAE
    480 	pmap_map_recursive_entries();
    481 #endif
    482 }
    483 
    484 #ifdef PAE
    485 /*
    486  * NetBSD uses L2 shadow pages to support PAE with Xen. However, Xen does not
    487  * handle them correctly during save/restore, leading to incorrect page
    488  * tracking and pinning during restore.
    489  * For save/restore to succeed, two functions are introduced:
    490  * - pmap_map_recursive_entries(), used by resume code to set the recursive
    491  *   mapping entries to their correct value
    492  * - pmap_unmap_recursive_entries(), used by suspend code to clear all
    493  *   PDIR_SLOT_PTE entries
    494  */
    495 void
    496 pmap_map_recursive_entries(void)
    497 {
    498 	int i;
    499 	struct pmap *pm;
    500 
    501 	mutex_enter(&pmaps_lock);
    502 	LIST_FOREACH(pm, &pmaps, pm_list) {
    503 		for (i = 0; i < PDP_SIZE; i++) {
    504 			xpq_queue_pte_update(
    505 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)),
    506 			    xpmap_ptom((pm)->pm_pdirpa[i]) | PG_V);
    507 		}
    508 	}
    509 	mutex_exit(&pmaps_lock);
    510 
    511 	for (i = 0; i < PDP_SIZE; i++) {
    512 		xpq_queue_pte_update(
    513 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
    514 		    xpmap_ptom(pmap_kernel()->pm_pdirpa[i]) | PG_V);
    515 	}
    516 
    517 	xpq_flush_queue();
    518 }
    519 
    520 void
    521 pmap_unmap_recursive_entries(void)
    522 {
    523 	int i;
    524 	struct pmap *pm;
    525 
    526 	/*
    527 	 * Invalidate pmap_pdp_cache as it contains L2-pinned objects with
    528 	 * recursive entries.
    529 	 * XXX jym@ : find a way to drain per-CPU caches to. pool_cache_inv
    530 	 * does not do that.
    531 	 */
    532 	pool_cache_invalidate(&pmap_pdp_cache);
    533 
    534 	mutex_enter(&pmaps_lock);
    535 	LIST_FOREACH(pm, &pmaps, pm_list) {
    536 		for (i = 0; i < PDP_SIZE; i++) {
    537 			xpq_queue_pte_update(
    538 			    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_PTE + i)), 0);
    539 		}
    540 	}
    541 	mutex_exit(&pmaps_lock);
    542 
    543 	/* do it for pmap_kernel() too! */
    544 	for (i = 0; i < PDP_SIZE; i++)
    545 		xpq_queue_pte_update(
    546 		    xpmap_ptom(pmap_pdirpa(pmap_kernel(), PDIR_SLOT_PTE + i)),
    547 		    0);
    548 
    549 	xpq_flush_queue();
    550 
    551 }
    552 #endif /* PAE */
    553 
    554 #if defined(PAE) || defined(__x86_64__)
    555 
    556 extern struct cpu_info	* (*xpq_cpu)(void);
    557 static __inline void
    558 pmap_kpm_setpte(struct cpu_info *ci, int index)
    559 {
    560 #ifdef PAE
    561 		xpq_queue_pte_update(
    562 			xpmap_ptetomach(&ci->ci_kpm_pdir[l2tol2(index)]),
    563 			pmap_kernel()->pm_pdir[index]);
    564 #elif defined(__x86_64__)
    565 		xpq_queue_pte_update(
    566 			xpmap_ptetomach(&ci->ci_kpm_pdir[index]),
    567 			pmap_kernel()->pm_pdir[index]);
    568 #endif /* PAE */
    569 }
    570 
    571 static void
    572 pmap_kpm_sync_xcall(void *arg1, void *arg2)
    573 {
    574 	KASSERT(arg1 != NULL);
    575 	KASSERT(arg2 != NULL);
    576 
    577 	struct pmap *pmap = arg1;
    578 	int index = *(int *)arg2;
    579 	KASSERT(pmap == pmap_kernel() || index < PDIR_SLOT_PTE);
    580 
    581 	struct cpu_info *ci = xpq_cpu();
    582 
    583 	if (pmap == pmap_kernel()) {
    584 		KASSERT(index >= PDIR_SLOT_KERN);
    585 		pmap_kpm_setpte(ci, index);
    586 		pmap_pte_flush();
    587 		return;
    588 	}
    589 
    590 #ifdef PAE
    591 	KASSERTMSG(false, "%s not allowed for PAE user pmaps", __func__);
    592 	return;
    593 #else /* __x86_64__ */
    594 
    595 	if (ci->ci_pmap != pmap) {
    596 		/* pmap changed. Nothing to do. */
    597 		return;
    598 	}
    599 
    600 	pmap_pte_set(&ci->ci_kpm_pdir[index],
    601 	    pmap->pm_pdir[index]);
    602 	pmap_pte_flush();
    603 #endif /* PAE || __x86_64__ */
    604 }
    605 
    606 /*
    607  * Synchronise shadow pdir with the pmap on all cpus on which it is
    608  * loaded.
    609  */
    610 void
    611 xen_kpm_sync(struct pmap *pmap, int index)
    612 {
    613 	uint64_t where;
    614 
    615 	KASSERT(pmap != NULL);
    616 
    617 	pmap_pte_flush();
    618 
    619 	if (__predict_false(xpq_cpu != &x86_curcpu)) { /* Too early to xcall */
    620 		CPU_INFO_ITERATOR cii;
    621 		struct cpu_info *ci;
    622 		for (CPU_INFO_FOREACH(cii, ci)) {
    623 			if (ci == NULL) {
    624 				continue;
    625 			}
    626 			if (pmap == pmap_kernel() ||
    627 			    ci->ci_cpumask & pmap->pm_cpus) {
    628 				pmap_kpm_setpte(ci, index);
    629 			}
    630 		}
    631 		pmap_pte_flush();
    632 		return;
    633 	}
    634 
    635 	if (pmap == pmap_kernel()) {
    636 		where = xc_broadcast(XC_HIGHPRI,
    637 		    pmap_kpm_sync_xcall, pmap, &index);
    638 		xc_wait(where);
    639 	} else {
    640 		KASSERT(mutex_owned(pmap->pm_lock));
    641 		KASSERT(kpreempt_disabled());
    642 
    643 		CPU_INFO_ITERATOR cii;
    644 		struct cpu_info *ci;
    645 		for (CPU_INFO_FOREACH(cii, ci)) {
    646 			if (ci == NULL) {
    647 				continue;
    648 			}
    649 			while (ci->ci_cpumask & pmap->pm_cpus) {
    650 #ifdef MULTIPROCESSOR
    651 #define CPU_IS_CURCPU(ci) __predict_false((ci) == curcpu())
    652 #else /* MULTIPROCESSOR */
    653 #define CPU_IS_CURCPU(ci) __predict_true((ci) == curcpu())
    654 #endif /* MULTIPROCESSOR */
    655 #if 0 /* XXX: Race with remote pmap_load() */
    656 				if (ci->ci_want_pmapload &&
    657 				    !CPU_IS_CURCPU(ci)) {
    658 					/*
    659 					 * XXX: make this more cpu
    660 					 *  cycle friendly/co-operate
    661 					 *  with pmap_load()
    662 					 */
    663 					continue;
    664 				    }
    665 #endif /* 0 */
    666 				where = xc_unicast(XC_HIGHPRI, pmap_kpm_sync_xcall,
    667 				    pmap, &index, ci);
    668 				xc_wait(where);
    669 				break;
    670 			}
    671 		}
    672 	}
    673 }
    674 
    675 #endif /* PAE || __x86_64__ */
    676