Home | History | Annotate | Line # | Download | only in x86
x86_xpmap.c revision 1.12.4.2
      1 /*	$NetBSD: x86_xpmap.c,v 1.12.4.2 2009/05/13 17:18:50 jym Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
      5  *
      6  * Permission to use, copy, modify, and distribute this software for any
      7  * purpose with or without fee is hereby granted, provided that the above
      8  * copyright notice and this permission notice appear in all copies.
      9  *
     10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     17  */
     18 
     19 /*
     20  * Copyright (c) 2006, 2007 Manuel Bouyer.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the above copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *	This product includes software developed by Manuel Bouyer.
     33  * 4. The name of the author may not be used to endorse or promote products
     34  *    derived from this software without specific prior written permission.
     35  *
     36  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     37  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     39  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     40  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     41  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     42  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     43  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     44  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     45  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     46  *
     47  */
     48 
     49 /*
     50  *
     51  * Copyright (c) 2004 Christian Limpach.
     52  * All rights reserved.
     53  *
     54  * Redistribution and use in source and binary forms, with or without
     55  * modification, are permitted provided that the following conditions
     56  * are met:
     57  * 1. Redistributions of source code must retain the above copyright
     58  *    notice, this list of conditions and the following disclaimer.
     59  * 2. Redistributions in binary form must reproduce the above copyright
     60  *    notice, this list of conditions and the following disclaimer in the
     61  *    documentation and/or other materials provided with the distribution.
     62  * 3. All advertising materials mentioning features or use of this software
     63  *    must display the following acknowledgement:
     64  *      This product includes software developed by Christian Limpach.
     65  * 4. The name of the author may not be used to endorse or promote products
     66  *    derived from this software without specific prior written permission.
     67  *
     68  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     69  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     70  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     71  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     72  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     73  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     74  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     75  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     76  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     77  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     78  */
     79 
     80 
     81 #include <sys/cdefs.h>
     82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.12.4.2 2009/05/13 17:18:50 jym Exp $");
     83 
     84 #include "opt_xen.h"
     85 #include "opt_ddb.h"
     86 #include "ksyms.h"
     87 
     88 #include <sys/param.h>
     89 #include <sys/systm.h>
     90 #include <sys/rwlock.h>
     91 
     92 #include <uvm/uvm.h>
     93 
     94 #include <machine/pmap.h>
     95 #include <machine/gdt.h>
     96 #include <xen/xenfunc.h>
     97 
     98 #include <dev/isa/isareg.h>
     99 #include <machine/isa_machdep.h>
    100 
    101 #undef	XENDEBUG
    102 /* #define XENDEBUG_SYNC */
    103 /* #define	XENDEBUG_LOW */
    104 
    105 #ifdef XENDEBUG
    106 #define	XENPRINTF(x) printf x
    107 #define	XENPRINTK(x) printk x
    108 #define	XENPRINTK2(x) /* printk x */
    109 
    110 static char XBUF[256];
    111 #else
    112 #define	XENPRINTF(x)
    113 #define	XENPRINTK(x)
    114 #define	XENPRINTK2(x)
    115 #endif
    116 #define	PRINTF(x) printf x
    117 #define	PRINTK(x) printk x
    118 
    119 /* on x86_64 kernel runs in ring 3 */
    120 #ifdef __x86_64__
    121 #define PG_k PG_u
    122 #else
    123 #define PG_k 0
    124 #endif
    125 
    126 volatile shared_info_t *HYPERVISOR_shared_info;
    127 /* Xen requires the start_info struct to be page aligned */
    128 union start_info_union start_info_union __aligned(PAGE_SIZE);
    129 unsigned long *xpmap_phys_to_machine_mapping;
    130 
    131 /*
    132  * We should avoid the domU to manipulate MFNs when it is suspending
    133  * or migrating, as they could be invalid once domU resumes operations.
    134  *
    135  * We use a read/write lock for that: when a thread is expected to
    136  * manipulate MFNs, it should first acquire a reader lock, then proceed
    137  * to MFN's manipulation. Once it has finished with it, the reader lock is
    138  * released.
    139  *
    140  * The thread responsible for the domU suspension will acquire an exclusive
    141  * (writer) lock.
    142  */
    143 static krwlock_t xen_ptom_lock;
    144 
    145 void
    146 xen_init_ptom_lock(void) {
    147 	rw_init(&xen_ptom_lock);
    148 }
    149 
    150 void
    151 xen_release_ptom_lock(void) {
    152 	rw_exit(&xen_ptom_lock);
    153 }
    154 
    155 void
    156 xen_acquire_reader_ptom_lock(void) {
    157 	rw_enter(&xen_ptom_lock, RW_READER);
    158 }
    159 
    160 void
    161 xen_acquire_writer_ptom_lock(void) {
    162 	rw_enter(&xen_ptom_lock, RW_WRITER);
    163 }
    164 
    165 void xen_failsafe_handler(void);
    166 
    167 #ifdef XEN3
    168 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
    169 	HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
    170 #else
    171 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
    172 	HYPERVISOR_mmu_update((req), (count), (success_count))
    173 #endif
    174 
    175 void
    176 xen_failsafe_handler(void)
    177 {
    178 
    179 	panic("xen_failsafe_handler called!\n");
    180 }
    181 
    182 
    183 void
    184 xen_set_ldt(vaddr_t base, uint32_t entries)
    185 {
    186 	vaddr_t va;
    187 	vaddr_t end;
    188 	pt_entry_t *ptp;
    189 	int s;
    190 
    191 #ifdef __x86_64__
    192 	end = base + (entries << 3);
    193 #else
    194 	end = base + entries * sizeof(union descriptor);
    195 #endif
    196 
    197 #ifdef XEN3
    198 	xen_acquire_reader_ptom_lock();
    199 #endif
    200 
    201 	for (va = base; va < end; va += PAGE_SIZE) {
    202 		KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
    203 		ptp = kvtopte(va);
    204 		XENPRINTF(("xen_set_ldt %p %d %p\n", (void *)base,
    205 			      entries, ptp));
    206 		pmap_pte_clearbits(ptp, PG_RW);
    207 	}
    208 	s = splvm();
    209 	xpq_queue_set_ldt(base, entries);
    210 	xpq_flush_queue();
    211 
    212 #ifdef XEN3
    213 	xen_release_ptom_lock();
    214 #endif
    215 
    216 	splx(s);
    217 }
    218 
    219 #ifdef XENDEBUG
    220 void xpq_debug_dump(void);
    221 #endif
    222 
    223 #define XPQUEUE_SIZE 2048
    224 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
    225 static int xpq_idx = 0;
    226 
    227 void
    228 xpq_flush_queue(void)
    229 {
    230 	int i, ok;
    231 
    232 	XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
    233 	for (i = 0; i < xpq_idx; i++)
    234 		XENPRINTK2(("%d: %p %08" PRIx64 "\n", i,
    235 		    (uint64_t)xpq_queue[i].ptr, (uint64_t)xpq_queue[i].val));
    236 	if (xpq_idx != 0 &&
    237 	    HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
    238 		printf("xpq_flush_queue: %d entries \n", xpq_idx);
    239 		for (i = 0; i < xpq_idx; i++)
    240 			printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
    241 			   (uint64_t)xpq_queue[i].ptr,
    242 			   (uint64_t)xpq_queue[i].val);
    243 		panic("HYPERVISOR_mmu_update failed\n");
    244 	}
    245 	xpq_idx = 0;
    246 }
    247 
    248 static inline void
    249 xpq_increment_idx(void)
    250 {
    251 
    252 	xpq_idx++;
    253 	if (__predict_false(xpq_idx == XPQUEUE_SIZE))
    254 		xpq_flush_queue();
    255 }
    256 
    257 void
    258 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
    259 {
    260 	XENPRINTK2(("xpq_queue_machphys_update ma=0x%" PRIx64 " pa=0x%" PRIx64
    261 	    "\n", (int64_t)ma, (int64_t)pa));
    262 	xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
    263 	xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
    264 	xpq_increment_idx();
    265 #ifdef XENDEBUG_SYNC
    266 	xpq_flush_queue();
    267 #endif
    268 }
    269 
    270 void
    271 xpq_queue_pte_update(paddr_t ptr, pt_entry_t val)
    272 {
    273 
    274 	KASSERT((ptr & 3) == 0);
    275 	xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
    276 	xpq_queue[xpq_idx].val = val;
    277 	xpq_increment_idx();
    278 #ifdef XENDEBUG_SYNC
    279 	xpq_flush_queue();
    280 #endif
    281 }
    282 
    283 #ifdef XEN3
    284 void
    285 xpq_queue_pt_switch(paddr_t pa)
    286 {
    287 	struct mmuext_op op;
    288 	xpq_flush_queue();
    289 
    290 	XENPRINTK2(("xpq_queue_pt_switch: 0x%" PRIx64 " 0x%" PRIx64 "\n",
    291 	    (int64_t)pa, (int64_t)pa));
    292 	op.cmd = MMUEXT_NEW_BASEPTR;
    293 	op.arg1.mfn = pa >> PAGE_SHIFT;
    294 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    295 		panic("xpq_queue_pt_switch");
    296 }
    297 
    298 void
    299 xpq_queue_pin_table(paddr_t pa)
    300 {
    301 	struct mmuext_op op;
    302 	xpq_flush_queue();
    303 
    304 	XENPRINTK2(("xpq_queue_pin_table: 0x%" PRIx64 " 0x%" PRIx64 "\n",
    305 	    (int64_t)pa, (int64_t)pa));
    306 	op.arg1.mfn = pa >> PAGE_SHIFT;
    307 
    308 #if defined(__x86_64__)
    309 	op.cmd = MMUEXT_PIN_L4_TABLE;
    310 #else
    311 	op.cmd = MMUEXT_PIN_L2_TABLE;
    312 #endif
    313 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    314 		panic("xpq_queue_pin_table");
    315 }
    316 
    317 #ifdef PAE
    318 static void
    319 xpq_queue_pin_l3_table(paddr_t pa)
    320 {
    321 	struct mmuext_op op;
    322 	xpq_flush_queue();
    323 
    324 	XENPRINTK2(("xpq_queue_pin_l2_table: 0x%" PRIx64 " 0x%" PRIx64 "\n",
    325 	    (int64_t)pa, (int64_t)pa));
    326 	op.arg1.mfn = pa >> PAGE_SHIFT;
    327 
    328 	op.cmd = MMUEXT_PIN_L3_TABLE;
    329 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    330 		panic("xpq_queue_pin_table");
    331 }
    332 #endif
    333 
    334 void
    335 xpq_queue_unpin_table(paddr_t pa)
    336 {
    337 	struct mmuext_op op;
    338 	xpq_flush_queue();
    339 
    340 	XENPRINTK2(("xpq_queue_unpin_table: 0x%" PRIx64 " 0x%" PRIx64 "\n",
    341 	    (int64_t)pa, (int64_t)pa));
    342 	op.arg1.mfn = pa >> PAGE_SHIFT;
    343 	op.cmd = MMUEXT_UNPIN_TABLE;
    344 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    345 		panic("xpq_queue_unpin_table");
    346 }
    347 
    348 void
    349 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
    350 {
    351 	struct mmuext_op op;
    352 	xpq_flush_queue();
    353 
    354 	XENPRINTK2(("xpq_queue_set_ldt\n"));
    355 	KASSERT(va == (va & ~PAGE_MASK));
    356 	op.cmd = MMUEXT_SET_LDT;
    357 	op.arg1.linear_addr = va;
    358 	op.arg2.nr_ents = entries;
    359 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    360 		panic("xpq_queue_set_ldt");
    361 }
    362 
    363 void
    364 xpq_queue_tlb_flush(void)
    365 {
    366 	struct mmuext_op op;
    367 	xpq_flush_queue();
    368 
    369 	XENPRINTK2(("xpq_queue_tlb_flush\n"));
    370 	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
    371 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    372 		panic("xpq_queue_tlb_flush");
    373 }
    374 
    375 void
    376 xpq_flush_cache(void)
    377 {
    378 	struct mmuext_op op;
    379 	int s = splvm();
    380 	xpq_flush_queue();
    381 
    382 	XENPRINTK2(("xpq_queue_flush_cache\n"));
    383 	op.cmd = MMUEXT_FLUSH_CACHE;
    384 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    385 		panic("xpq_flush_cache");
    386 	splx(s);
    387 }
    388 
    389 void
    390 xpq_queue_invlpg(vaddr_t va)
    391 {
    392 	struct mmuext_op op;
    393 	xpq_flush_queue();
    394 
    395 	XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
    396 	op.cmd = MMUEXT_INVLPG_LOCAL;
    397 	op.arg1.linear_addr = (va & ~PAGE_MASK);
    398 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
    399 		panic("xpq_queue_invlpg");
    400 }
    401 
    402 int
    403 xpq_update_foreign(paddr_t ptr, pt_entry_t val, int dom)
    404 {
    405 	mmu_update_t op;
    406 	int ok;
    407 	xpq_flush_queue();
    408 
    409 	op.ptr = ptr;
    410 	op.val = val;
    411 	if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
    412 		return EFAULT;
    413 	return (0);
    414 }
    415 #else /* XEN3 */
    416 void
    417 xpq_queue_pt_switch(paddr_t pa)
    418 {
    419 
    420 	XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
    421 	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
    422 	xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
    423 	xpq_increment_idx();
    424 }
    425 
    426 void
    427 xpq_queue_pin_table(paddr_t pa)
    428 {
    429 
    430 	XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
    431 	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
    432 	xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
    433 	xpq_increment_idx();
    434 }
    435 
    436 void
    437 xpq_queue_unpin_table(paddr_t pa)
    438 {
    439 
    440 	XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
    441 	xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
    442 	xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
    443 	xpq_increment_idx();
    444 }
    445 
    446 void
    447 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
    448 {
    449 
    450 	XENPRINTK2(("xpq_queue_set_ldt\n"));
    451 	KASSERT(va == (va & ~PAGE_MASK));
    452 	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
    453 	xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
    454 	xpq_increment_idx();
    455 }
    456 
    457 void
    458 xpq_queue_tlb_flush(void)
    459 {
    460 
    461 	XENPRINTK2(("xpq_queue_tlb_flush\n"));
    462 	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
    463 	xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
    464 	xpq_increment_idx();
    465 }
    466 
    467 void
    468 xpq_flush_cache(void)
    469 {
    470 	int s = splvm();
    471 
    472 	XENPRINTK2(("xpq_queue_flush_cache\n"));
    473 	xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
    474 	xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
    475 	xpq_increment_idx();
    476 	xpq_flush_queue();
    477 	splx(s);
    478 }
    479 
    480 void
    481 xpq_queue_invlpg(vaddr_t va)
    482 {
    483 
    484 	XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
    485 	xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
    486 	xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
    487 	xpq_increment_idx();
    488 }
    489 
    490 int
    491 xpq_update_foreign(paddr_t ptr, pt_entry_t val, int dom)
    492 {
    493 	mmu_update_t xpq_up[3];
    494 
    495 	xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
    496 	xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
    497 	xpq_up[1].ptr = ptr;
    498 	xpq_up[1].val = val;
    499 	if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
    500 		return EFAULT;
    501 	return (0);
    502 }
    503 #endif /* XEN3 */
    504 
    505 #ifdef XENDEBUG
    506 void
    507 xpq_debug_dump(void)
    508 {
    509 	int i;
    510 
    511 	XENPRINTK2(("idx: %d\n", xpq_idx));
    512 	for (i = 0; i < xpq_idx; i++) {
    513 		sprintf(XBUF, "%" PRIx64 " %08" PRIx64,
    514 		    (uint64_t)xpq_queue[i].ptr, (uint64_t)xpq_queue[i].val);
    515 		if (++i < xpq_idx)
    516 			sprintf(XBUF + strlen(XBUF), "%" PRIx64 " %08" PRIx64,
    517 			    (uint64_t)xpq_queue[i].ptr, (uint64_t)xpq_queue[i].val);
    518 		if (++i < xpq_idx)
    519 			sprintf(XBUF + strlen(XBUF), "%" PRIx64 " %08" PRIx64,
    520 			    (uint64_t)xpq_queue[i].ptr, (uint64_t)xpq_queue[i].val);
    521 		if (++i < xpq_idx)
    522 			sprintf(XBUF + strlen(XBUF), "%" PRIx64 " %08" PRIx64,
    523 			    (uint64_t)xpq_queue[i].ptr, (uint64_t)xpq_queue[i].val);
    524 		XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
    525 	}
    526 }
    527 #endif
    528 
    529 
    530 extern volatile struct xencons_interface *xencons_interface; /* XXX */
    531 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
    532 
    533 static void xen_bt_set_readonly (vaddr_t);
    534 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
    535 
    536 /* How many PDEs ? */
    537 #if L2_SLOT_KERNBASE > 0
    538 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
    539 #else
    540 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
    541 #endif
    542 
    543 /*
    544  * Construct and switch to new pagetables
    545  * first_avail is the first vaddr we can use after
    546  * we get rid of Xen pagetables
    547  */
    548 
    549 vaddr_t xen_pmap_bootstrap (void);
    550 
    551 /*
    552  * Function to get rid of Xen bootstrap tables
    553  */
    554 
    555 /* How many PDP do we need: */
    556 #ifdef PAE
    557 /*
    558  * For PAE, we consider a single contigous L2 "superpage" of 4 pages,
    559  * all of them mapped by the L3 page. We also need a shadow page
    560  * for L3[3].
    561  */
    562 static const int l2_4_count = 6;
    563 #else
    564 static const int l2_4_count = PTP_LEVELS - 1;
    565 #endif
    566 
    567 vaddr_t
    568 xen_pmap_bootstrap(void)
    569 {
    570 	int count, oldcount;
    571 	long mapsize;
    572 	vaddr_t bootstrap_tables, init_tables;
    573 
    574 	xpmap_phys_to_machine_mapping =
    575 	    (unsigned long *)xen_start_info.mfn_list;
    576 	init_tables = xen_start_info.pt_base;
    577 	__PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
    578 
    579 	/* Space after Xen boostrap tables should be free */
    580 	bootstrap_tables = xen_start_info.pt_base +
    581 		(xen_start_info.nr_pt_frames * PAGE_SIZE);
    582 
    583 	/*
    584 	 * Calculate how many space we need
    585 	 * first everything mapped before the Xen bootstrap tables
    586 	 */
    587 	mapsize = init_tables - KERNTEXTOFF;
    588 	/* after the tables we'll have:
    589 	 *  - UAREA
    590 	 *  - dummy user PGD (x86_64)
    591 	 *  - HYPERVISOR_shared_info
    592 	 *  - ISA I/O mem (if needed)
    593 	 */
    594 	mapsize += UPAGES * NBPG;
    595 #ifdef __x86_64__
    596 	mapsize += NBPG;
    597 #endif
    598 	mapsize += NBPG;
    599 
    600 #ifdef DOM0OPS
    601 	if (xendomain_is_dom0()) {
    602 		/* space for ISA I/O mem */
    603 		mapsize += IOM_SIZE;
    604 	}
    605 #endif
    606 	/* at this point mapsize doens't include the table size */
    607 
    608 #ifdef __x86_64__
    609 	count = TABLE_L2_ENTRIES;
    610 #else
    611 	count = (mapsize + (NBPD_L2 -1)) >> L2_SHIFT;
    612 #endif /* __x86_64__ */
    613 
    614 	/* now compute how many L2 pages we need exactly */
    615 	XENPRINTK(("bootstrap_final mapsize 0x%lx count %d\n", mapsize, count));
    616 	while (mapsize + (count + l2_4_count) * PAGE_SIZE + KERNTEXTOFF >
    617 	    ((long)count << L2_SHIFT) + KERNBASE) {
    618 		count++;
    619 	}
    620 #ifndef __x86_64__
    621 	/*
    622 	 * one more L2 page: we'll alocate several pages after kva_start
    623 	 * in pmap_bootstrap() before pmap_growkernel(), which have not been
    624 	 * counted here. It's not a big issue to allocate one more L2 as
    625 	 * pmap_growkernel() will be called anyway.
    626 	 */
    627 	count++;
    628 	nkptp[1] = count;
    629 #endif
    630 
    631 	/*
    632 	 * install bootstrap pages. We may need more L2 pages than will
    633 	 * have the final table here, as it's installed after the final table
    634 	 */
    635 	oldcount = count;
    636 
    637 bootstrap_again:
    638 	XENPRINTK(("bootstrap_again oldcount %d\n", oldcount));
    639 	/*
    640 	 * Xen space we'll reclaim may not be enough for our new page tables,
    641 	 * move bootstrap tables if necessary
    642 	 */
    643 	if (bootstrap_tables < init_tables + ((count + l2_4_count) * PAGE_SIZE))
    644 		bootstrap_tables = init_tables +
    645 					((count + l2_4_count) * PAGE_SIZE);
    646 	/* make sure we have enough to map the bootstrap_tables */
    647 	if (bootstrap_tables + ((oldcount + l2_4_count) * PAGE_SIZE) >
    648 	    ((long)oldcount << L2_SHIFT) + KERNBASE) {
    649 		oldcount++;
    650 		goto bootstrap_again;
    651 	}
    652 
    653 	/* Create temporary tables */
    654 	xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
    655 		xen_start_info.nr_pt_frames, oldcount, 0);
    656 
    657 	/* Create final tables */
    658 	xen_bootstrap_tables(bootstrap_tables, init_tables,
    659 	    oldcount + l2_4_count, count, 1);
    660 
    661 	/* zero out free space after tables */
    662 	memset((void *)(init_tables + ((count + l2_4_count) * PAGE_SIZE)), 0,
    663 	    (UPAGES + 1) * NBPG);
    664 	return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
    665 }
    666 
    667 
    668 /*
    669  * Build a new table and switch to it
    670  * old_count is # of old tables (including PGD, PDTPE and PDE)
    671  * new_count is # of new tables (PTE only)
    672  * we assume areas don't overlap
    673  */
    674 
    675 
    676 static void
    677 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
    678 	int old_count, int new_count, int final)
    679 {
    680 	pd_entry_t *pdtpe, *pde, *pte;
    681 	pd_entry_t *cur_pgd, *bt_pgd;
    682 	paddr_t addr;
    683 	vaddr_t page, avail, text_end, map_end;
    684 	int i;
    685 	extern char __data_start;
    686 
    687 	__PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
    688 	    old_pgd, new_pgd, old_count, new_count));
    689 	text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
    690 	/*
    691 	 * size of R/W area after kernel text:
    692 	 *  xencons_interface (if present)
    693 	 *  xenstore_interface (if present)
    694 	 *  table pages (new_count + l2_4_count entries)
    695 	 * extra mappings (only when final is true):
    696 	 *  UAREA
    697 	 *  dummy user PGD (x86_64 only)/gdt page (i386 only)
    698 	 *  HYPERVISOR_shared_info
    699 	 *  ISA I/O mem (if needed)
    700 	 */
    701 	map_end = new_pgd + ((new_count + l2_4_count) * NBPG);
    702 	if (final) {
    703 		map_end += (UPAGES + 1) * NBPG;
    704 		HYPERVISOR_shared_info = (shared_info_t *)map_end;
    705 		map_end += NBPG;
    706 	}
    707 	/*
    708 	 * we always set atdevbase, as it's used by init386 to find the first
    709 	 * available VA. map_end is updated only if we are dom0, so
    710 	 * atdevbase -> atdevbase + IOM_SIZE will be mapped only in
    711 	 * this case.
    712 	 */
    713 	if (final)
    714 		atdevbase = map_end;
    715 #ifdef DOM0OPS
    716 	if (final && xendomain_is_dom0()) {
    717 		/* ISA I/O mem */
    718 		map_end += IOM_SIZE;
    719 	}
    720 #endif /* DOM0OPS */
    721 
    722 	__PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
    723 	    text_end, map_end));
    724 	__PRINTK(("console 0x%lx ", xen_start_info.console.domU.mfn));
    725 	__PRINTK(("xenstore 0x%lx\n", xen_start_info.store_mfn));
    726 
    727 	/*
    728 	 * Create bootstrap page tables
    729 	 * What we need:
    730 	 * - a PGD (level 4)
    731 	 * - a PDTPE (level 3)
    732 	 * - a PDE (level2)
    733 	 * - some PTEs (level 1)
    734 	 */
    735 
    736 	cur_pgd = (pd_entry_t *) old_pgd;
    737 	bt_pgd = (pd_entry_t *) new_pgd;
    738 	memset (bt_pgd, 0, PAGE_SIZE);
    739 	avail = new_pgd + PAGE_SIZE;
    740 #if PTP_LEVELS > 3
    741 	/* Install level 3 */
    742 	pdtpe = (pd_entry_t *) avail;
    743 	memset (pdtpe, 0, PAGE_SIZE);
    744 	avail += PAGE_SIZE;
    745 
    746 	addr = ((u_long) pdtpe) - KERNBASE;
    747 	bt_pgd[pl4_pi(KERNTEXTOFF)] =
    748 	    xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
    749 
    750 	__PRINTK(("L3 va 0x%lx pa 0x%" PRIx64 " entry 0x%" PRIx64 " -> L4[0x%x]\n",
    751 	    pdtpe, (uint64_t)addr, (uint64_t)bt_pgd[pl4_pi(KERNTEXTOFF)],
    752 	    pl4_pi(KERNTEXTOFF)));
    753 #else
    754 	pdtpe = bt_pgd;
    755 #endif /* PTP_LEVELS > 3 */
    756 
    757 #if PTP_LEVELS > 2
    758 	/* Level 2 */
    759 	pde = (pd_entry_t *) avail;
    760 	memset(pde, 0, PAGE_SIZE);
    761 	avail += PAGE_SIZE;
    762 
    763 	addr = ((u_long) pde) - KERNBASE;
    764 	pdtpe[pl3_pi(KERNTEXTOFF)] =
    765 	    xpmap_ptom_masked(addr) | PG_k | PG_V | PG_RW;
    766 	__PRINTK(("L2 va 0x%lx pa 0x%" PRIx64 " entry 0x%" PRIx64 " -> L3[0x%x]\n",
    767 	    pde, (int64_t)addr, (int64_t)pdtpe[pl3_pi(KERNTEXTOFF)],
    768 	    pl3_pi(KERNTEXTOFF)));
    769 #elif defined(PAE)
    770 	/* our PAE-style level 2: 5 contigous pages (4 L2 + 1 shadow) */
    771 	pde = (pd_entry_t *) avail;
    772 	memset(pde, 0, PAGE_SIZE * 5);
    773 	avail += PAGE_SIZE * 5;
    774 	addr = ((u_long) pde) - KERNBASE;
    775 	/*
    776 	 * enter L2 pages in the L3.
    777 	 * The real L2 kernel PD will be the last one (so that
    778 	 * pde[L2_SLOT_KERN] always point to the shadow).
    779 	 */
    780 	for (i = 0; i < 3; i++, addr += PAGE_SIZE) {
    781 		/*
    782 		 * Xen doens't want R/W mappings in L3 entries, it'll add it
    783 		 * itself.
    784 		 */
    785 		pdtpe[i] = xpmap_ptom_masked(addr) | PG_k | PG_V;
    786 		__PRINTK(("L2 va 0x%lx pa 0x%" PRIx64 " entry 0x%" PRIx64
    787 		    " -> L3[0x%x]\n", (vaddr_t)pde + PAGE_SIZE * i,
    788 		    (int64_t)addr, (int64_t)pdtpe[i], i));
    789 	}
    790 	addr += PAGE_SIZE;
    791 	pdtpe[3] = xpmap_ptom_masked(addr) | PG_k | PG_V;
    792 	__PRINTK(("L2 va 0x%lx pa 0x%" PRIx64 " entry 0x%" PRIx64
    793 	    " -> L3[0x%x]\n", (vaddr_t)pde + PAGE_SIZE * 4,
    794 	    (int64_t)addr, (int64_t)pdtpe[3], 3));
    795 
    796 #else /* PAE */
    797 	pde = bt_pgd;
    798 #endif /* PTP_LEVELS > 2 */
    799 
    800 	/* Level 1 */
    801 	page = KERNTEXTOFF;
    802 	for (i = 0; i < new_count; i ++) {
    803 		vaddr_t cur_page = page;
    804 
    805 		pte = (pd_entry_t *) avail;
    806 		avail += PAGE_SIZE;
    807 
    808 		memset(pte, 0, PAGE_SIZE);
    809 		while (pl2_pi(page) == pl2_pi (cur_page)) {
    810 			if (page >= map_end) {
    811 				/* not mapped at all */
    812 				pte[pl1_pi(page)] = 0;
    813 				page += PAGE_SIZE;
    814 				continue;
    815 			}
    816 			pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
    817 			if (page == (vaddr_t)HYPERVISOR_shared_info) {
    818 				pte[pl1_pi(page)] = xen_start_info.shared_info;
    819 				__PRINTK(("HYPERVISOR_shared_info "
    820 				    "va 0x%lx pte 0x%" PRIx64 "\n",
    821 				    HYPERVISOR_shared_info, (int64_t)pte[pl1_pi(page)]));
    822 			}
    823 #ifdef XEN3
    824 			if ((xpmap_ptom_masked(page - KERNBASE) >> PAGE_SHIFT)
    825 			    == xen_start_info.console.domU.mfn) {
    826 				xencons_interface = (void *)page;
    827 				pte[pl1_pi(page)] = xen_start_info.console.domU.mfn;
    828 				pte[pl1_pi(page)] <<= PAGE_SHIFT;
    829 				__PRINTK(("xencons_interface "
    830 				    "va 0x%lx pte 0x%" PRIx64 "\n",
    831 				    xencons_interface, (int64_t)pte[pl1_pi(page)]));
    832 			}
    833 			if ((xpmap_ptom_masked(page - KERNBASE) >> PAGE_SHIFT)
    834 			    == xen_start_info.store_mfn) {
    835 				xenstore_interface = (void *)page;
    836 				pte[pl1_pi(page)] = xen_start_info.store_mfn;
    837 				pte[pl1_pi(page)] <<= PAGE_SHIFT;
    838 				__PRINTK(("xenstore_interface "
    839 				    "va 0x%lx pte 0x%" PRIx64 "\n",
    840 				    xenstore_interface, (int64_t)pte[pl1_pi(page)]));
    841 			}
    842 #endif /* XEN3 */
    843 #ifdef DOM0OPS
    844 			if (page >= (vaddr_t)atdevbase &&
    845 			    page < (vaddr_t)atdevbase + IOM_SIZE) {
    846 				pte[pl1_pi(page)] =
    847 				    IOM_BEGIN + (page - (vaddr_t)atdevbase);
    848 			}
    849 #endif
    850 			pte[pl1_pi(page)] |= PG_k | PG_V;
    851 			if (page < text_end) {
    852 				/* map kernel text RO */
    853 				pte[pl1_pi(page)] |= 0;
    854 			} else if (page >= old_pgd
    855 			    && page < old_pgd + (old_count * PAGE_SIZE)) {
    856 				/* map old page tables RO */
    857 				pte[pl1_pi(page)] |= 0;
    858 			} else if (page >= new_pgd &&
    859 			    page < new_pgd + ((new_count + l2_4_count) * PAGE_SIZE)) {
    860 				/* map new page tables RO */
    861 				pte[pl1_pi(page)] |= 0;
    862 			} else {
    863 				/* map page RW */
    864 				pte[pl1_pi(page)] |= PG_RW;
    865 			}
    866 
    867 			if ((page  >= old_pgd && page < old_pgd + (old_count * PAGE_SIZE))
    868 			    || page >= new_pgd) {
    869 				__PRINTK(("va 0x%lx pa 0x%lx "
    870 				    "entry 0x%" PRIx64 " -> L1[0x%x]\n",
    871 				    page, page - KERNBASE,
    872 				    (int64_t)pte[pl1_pi(page)], pl1_pi(page)));
    873 			}
    874 			page += PAGE_SIZE;
    875 		}
    876 
    877 		addr = ((u_long) pte) - KERNBASE;
    878 		pde[pl2_pi(cur_page)] =
    879 		    xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
    880 		__PRINTK(("L1 va 0x%lx pa 0x%" PRIx64 " entry 0x%" PRIx64
    881 		    " -> L2[0x%x]\n", pte, (int64_t)addr,
    882 		    (int64_t)pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
    883 		/* Mark readonly */
    884 		xen_bt_set_readonly((vaddr_t) pte);
    885 	}
    886 
    887 	/* Install recursive page tables mapping */
    888 #ifdef PAE
    889 	/*
    890 	 * we need a shadow page for the kernel's L2 page
    891 	 * The real L2 kernel PD will be the last one (so that
    892 	 * pde[L2_SLOT_KERN] always point to the shadow.
    893 	 */
    894 	memcpy(&pde[L2_SLOT_KERN + NPDPG], &pde[L2_SLOT_KERN], PAGE_SIZE);
    895 	pmap_kl2pd = &pde[L2_SLOT_KERN + NPDPG];
    896 	pmap_kl2paddr = (u_long)pmap_kl2pd - KERNBASE;
    897 
    898 	/*
    899 	 * We don't enter a recursive entry from the L3 PD. Instead,
    900 	 * we enter the first 4 L2 pages, which includes the kernel's L2
    901 	 * shadow. But we have to entrer the shadow after switching
    902 	 * %cr3, or Xen will refcount some PTE with the wrong type.
    903 	 */
    904 	addr = (u_long)pde - KERNBASE;
    905 	for (i = 0; i < 3; i++, addr += PAGE_SIZE) {
    906 		pde[PDIR_SLOT_PTE + i] = xpmap_ptom_masked(addr) | PG_k | PG_V;
    907 		__PRINTK(("pde[%d] va 0x%lx pa 0x%lx entry 0x%" PRIx64 "\n",
    908 		    (int)(PDIR_SLOT_PTE + i), pde + PAGE_SIZE * i, (long)addr,
    909 		    (int64_t)pde[PDIR_SLOT_PTE + i]));
    910 	}
    911 #if 0
    912 	addr += PAGE_SIZE; /* point to shadow L2 */
    913 	pde[PDIR_SLOT_PTE + 3] = xpmap_ptom_masked(addr) | PG_k | PG_V;
    914 	__PRINTK(("pde[%d] va 0x%lx pa 0x%lx entry 0x%" PRIx64 "\n",
    915 	    (int)(PDIR_SLOT_PTE + 3), pde + PAGE_SIZE * 4, (long)addr,
    916 	    (int64_t)pde[PDIR_SLOT_PTE + 3]));
    917 #endif
    918 	/* Mark tables RO, and pin the kenrel's shadow as L2 */
    919 	addr = (u_long)pde - KERNBASE;
    920 	for (i = 0; i < 5; i++, addr += PAGE_SIZE) {
    921 		xen_bt_set_readonly(((vaddr_t)pde) + PAGE_SIZE * i);
    922 		if (i == 2 || i == 3)
    923 			continue;
    924 #if 0
    925 		__PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", i, (int64_t)addr));
    926 		xpq_queue_pin_table(xpmap_ptom_masked(addr));
    927 #endif
    928 	}
    929 	if (final) {
    930 		addr = (u_long)pde - KERNBASE + 3 * PAGE_SIZE;
    931 		__PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", 2, (int64_t)addr));
    932 		xpq_queue_pin_table(xpmap_ptom_masked(addr));
    933 	}
    934 #if 0
    935 	addr = (u_long)pde - KERNBASE + 2 * PAGE_SIZE;
    936 	__PRINTK(("pin L2 %d addr 0x%" PRIx64 "\n", 2, (int64_t)addr));
    937 	xpq_queue_pin_table(xpmap_ptom_masked(addr));
    938 #endif
    939 #else /* PAE */
    940 	/* recursive entry in higher-level PD */
    941 	bt_pgd[PDIR_SLOT_PTE] =
    942 	    xpmap_ptom_masked(new_pgd - KERNBASE) | PG_k | PG_V;
    943 	__PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%" PRIx64
    944 	    " entry 0x%" PRIx64 "\n", new_pgd, (int64_t)new_pgd - KERNBASE,
    945 	    (int64_t)bt_pgd[PDIR_SLOT_PTE]));
    946 	/* Mark tables RO */
    947 	xen_bt_set_readonly((vaddr_t) pde);
    948 #endif
    949 #if PTP_LEVELS > 2 || defined(PAE)
    950 	xen_bt_set_readonly((vaddr_t) pdtpe);
    951 #endif
    952 #if PTP_LEVELS > 3
    953 	xen_bt_set_readonly(new_pgd);
    954 #endif
    955 	/* Pin the PGD */
    956 	__PRINTK(("pin PDG\n"));
    957 #ifdef PAE
    958 	xpq_queue_pin_l3_table(xpmap_ptom_masked(new_pgd - KERNBASE));
    959 #else
    960 	xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
    961 #endif
    962 #ifdef __i386__
    963 	/* Save phys. addr of PDP, for libkvm. */
    964 	PDPpaddr = (long)pde;
    965 #ifdef PAE
    966 	/* also save the address of the L3 page */
    967 	pmap_l3pd = pdtpe;
    968 	pmap_l3paddr = (new_pgd - KERNBASE);
    969 #endif /* PAE */
    970 #endif /* i386 */
    971 	/* Switch to new tables */
    972 	__PRINTK(("switch to PDG\n"));
    973 	xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
    974 	__PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%" PRIx64 "\n",
    975 	    (int64_t)bt_pgd[PDIR_SLOT_PTE]));
    976 #ifdef PAE
    977 	if (final) {
    978 		/* now enter kernel's PTE mappings */
    979 		addr =  (u_long)pde - KERNBASE + PAGE_SIZE * 3;
    980 		xpq_queue_pte_update(
    981 		    xpmap_ptom(((vaddr_t)&pde[PDIR_SLOT_PTE + 3]) - KERNBASE),
    982 		    xpmap_ptom_masked(addr) | PG_k | PG_V);
    983 		xpq_flush_queue();
    984 	}
    985 #endif
    986 
    987 
    988 
    989 	/* Now we can safely reclaim space taken by old tables */
    990 
    991 	__PRINTK(("unpin old PDG\n"));
    992 	/* Unpin old PGD */
    993 	xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
    994 	/* Mark old tables RW */
    995 	page = old_pgd;
    996 	addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
    997 	addr = xpmap_mtop(addr);
    998 	pte = (pd_entry_t *) ((u_long)addr + KERNBASE);
    999 	pte += pl1_pi(page);
   1000 	__PRINTK(("*pde 0x%" PRIx64 " addr 0x%" PRIx64 " pte 0x%lx\n",
   1001 	    (int64_t)pde[pl2_pi(page)], (int64_t)addr, (long)pte));
   1002 	while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
   1003 		addr = xpmap_ptom(((u_long) pte) - KERNBASE);
   1004 		XENPRINTK(("addr 0x%" PRIx64 " pte 0x%lx *pte 0x%" PRIx64 "\n",
   1005 		   (int64_t)addr, (long)pte, (int64_t)*pte));
   1006 		xpq_queue_pte_update(addr, *pte | PG_RW);
   1007 		page += PAGE_SIZE;
   1008 		/*
   1009 		 * Our ptes are contiguous
   1010 		 * so it's safe to just "++" here
   1011 		 */
   1012 		pte++;
   1013 	}
   1014 	xpq_flush_queue();
   1015 }
   1016 
   1017 
   1018 /*
   1019  * Bootstrap helper functions
   1020  */
   1021 
   1022 /*
   1023  * Mark a page readonly
   1024  * XXX: assuming vaddr = paddr + KERNBASE
   1025  */
   1026 
   1027 static void
   1028 xen_bt_set_readonly (vaddr_t page)
   1029 {
   1030 	pt_entry_t entry;
   1031 
   1032 	xen_acquire_reader_ptom_lock();
   1033 
   1034 	entry = xpmap_ptom_masked(page - KERNBASE);
   1035 	entry |= PG_k | PG_V;
   1036 
   1037 	HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
   1038 
   1039 	xen_release_ptom_lock();
   1040 }
   1041 
   1042 #ifdef __x86_64__
   1043 void
   1044 xen_set_user_pgd(paddr_t page)
   1045 {
   1046 	struct mmuext_op op;
   1047 	int s = splvm();
   1048 
   1049 	xpq_flush_queue();
   1050 	op.cmd = MMUEXT_NEW_USER_BASEPTR;
   1051 
   1052 	xen_acquire_reader_ptom_lock();
   1053 
   1054 	op.arg1.mfn = pfn_to_mfn(page >> PAGE_SHIFT);
   1055         if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
   1056 		panic("xen_set_user_pgd: failed to install new user page"
   1057 			" directory %lx", page);
   1058 
   1059 	xen_release_ptom_lock();
   1060 
   1061 	splx(s);
   1062 }
   1063 #endif /* __x86_64__ */
   1064