x86_xpmap.c revision 1.2 1 /* $NetBSD: x86_xpmap.c,v 1.2 2007/11/22 16:17:05 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49 /*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 * notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 * notice, this list of conditions and the following disclaimer in the
61 * documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 * must display the following acknowledgement:
64 * This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 * derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.2 2007/11/22 16:17:05 bouyer Exp $");
83
84 #include "opt_xen.h"
85
86 #include <sys/param.h>
87 #include <sys/systm.h>
88
89 #include <uvm/uvm.h>
90
91 #include <machine/pmap.h>
92 #include <machine/gdt.h>
93 #include <xen/xenfunc.h>
94
95 #include <dev/isa/isareg.h>
96 #include <machine/isa_machdep.h>
97
98 #undef XENDEBUG
99 /* #define XENDEBUG_SYNC */
100 /* #define XENDEBUG_LOW */
101
102 #ifdef XENDEBUG
103 #define XENPRINTF(x) printf x
104 #define XENPRINTK(x) printk x
105 #define XENPRINTK2(x) /* printk x */
106
107 static char XBUF[256];
108 #else
109 #define XENPRINTF(x)
110 #define XENPRINTK(x)
111 #define XENPRINTK2(x)
112 #endif
113 #define PRINTF(x) printf x
114 #define PRINTK(x) printk x
115
116 volatile shared_info_t *HYPERVISOR_shared_info;
117 union start_info_union start_info_union;
118
119 void xen_failsafe_handler(void);
120
121 #ifdef XEN3
122 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
123 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
124 #else
125 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
126 HYPERVISOR_mmu_update((req), (count), (success_count))
127 #endif
128
129 void
130 xen_failsafe_handler(void)
131 {
132
133 panic("xen_failsafe_handler called!\n");
134 }
135
136
137 #ifndef __x86_64__
138 void
139 xen_update_descriptor(union descriptor *table, union descriptor *entry)
140 {
141 paddr_t pa;
142 pt_entry_t *ptp;
143
144 ptp = kvtopte((vaddr_t)table);
145 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
146 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
147 panic("HYPERVISOR_update_descriptor failed\n");
148 }
149 #endif
150
151 void
152 xen_set_ldt(vaddr_t base, uint32_t entries)
153 {
154 vaddr_t va;
155 vaddr_t end;
156 pt_entry_t *ptp, *maptp;
157 int s;
158
159 #ifdef __x86_64__
160 end = base + (entries << 3);
161 #else
162 end = base + entries * sizeof(union descriptor);
163 #endif
164
165 for (va = base; va < end; va += PAGE_SIZE) {
166 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
167 ptp = kvtopte(va);
168 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
169 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
170 entries, ptp, maptp));
171 PTE_CLEARBITS(ptp, maptp, PG_RW);
172 }
173 s = splvm();
174 PTE_UPDATES_FLUSH();
175
176 xpq_queue_set_ldt(base, entries);
177 xpq_flush_queue();
178 splx(s);
179 }
180
181 #ifdef XENDEBUG
182 void xpq_debug_dump(void);
183 #endif
184
185 #define XPQUEUE_SIZE 2048
186 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
187 static int xpq_idx = 0;
188
189 void
190 xpq_flush_queue()
191 {
192 int i, ok;
193
194 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
195 for (i = 0; i < xpq_idx; i++)
196 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
197 (u_int)xpq_queue[i].val));
198 if (xpq_idx != 0 &&
199 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
200 printf("xpq_flush_queue: %d entries \n", xpq_idx);
201 for (i = 0; i < xpq_idx; i++)
202 printf("0x%16lx: 0x%16lx\n",
203 xpq_queue[i].ptr, xpq_queue[i].val);
204 panic("HYPERVISOR_mmu_update failed\n");
205 }
206 xpq_idx = 0;
207 }
208
209 static inline void
210 xpq_increment_idx(void)
211 {
212
213 xpq_idx++;
214 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
215 xpq_flush_queue();
216 }
217
218 void
219 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
220 {
221 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
222 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
223 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
224 xpq_increment_idx();
225 #ifdef XENDEBUG_SYNC
226 xpq_flush_queue();
227 #endif
228 }
229
230 void
231 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
232 {
233
234 KASSERT(((paddr_t)ptr & 3) == 0);
235 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
236 xpq_queue[xpq_idx].val = val;
237 xpq_increment_idx();
238 #ifdef XENDEBUG_SYNC
239 xpq_flush_queue();
240 #endif
241 }
242
243 void
244 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
245 {
246
247 KASSERT(((paddr_t)ptr & 3) == 0);
248 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
249 xpq_queue[xpq_idx].val = val;
250 xpq_increment_idx();
251 #ifdef XENDEBUG_SYNC
252 xpq_flush_queue();
253 #endif
254 }
255
256 #ifdef XEN3
257 void
258 xpq_queue_pt_switch(paddr_t pa)
259 {
260 struct mmuext_op op;
261 xpq_flush_queue();
262
263 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
264 op.cmd = MMUEXT_NEW_BASEPTR;
265 op.arg1.mfn = pa >> PAGE_SHIFT;
266 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
267 panic("xpq_queue_pt_switch");
268 }
269
270 void
271 xpq_queue_pin_table(paddr_t pa)
272 {
273 struct mmuext_op op;
274 xpq_flush_queue();
275
276 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
277 op.arg1.mfn = pa >> PAGE_SHIFT;
278
279 #ifdef __x86_64__
280 op.cmd = MMUEXT_PIN_L4_TABLE;
281 #else
282 op.cmd = MMUEXT_PIN_L2_TABLE;
283 #endif
284 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
285 panic("xpq_queue_pin_table");
286 }
287
288 void
289 xpq_queue_unpin_table(paddr_t pa)
290 {
291 struct mmuext_op op;
292 xpq_flush_queue();
293
294 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
295 op.arg1.mfn = pa >> PAGE_SHIFT;
296 op.cmd = MMUEXT_UNPIN_TABLE;
297 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
298 panic("xpq_queue_unpin_table");
299 }
300
301 void
302 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
303 {
304 struct mmuext_op op;
305 xpq_flush_queue();
306
307 XENPRINTK2(("xpq_queue_set_ldt\n"));
308 KASSERT(va == (va & ~PAGE_MASK));
309 op.cmd = MMUEXT_SET_LDT;
310 op.arg1.linear_addr = va;
311 op.arg2.nr_ents = entries;
312 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
313 panic("xpq_queue_set_ldt");
314 }
315
316 void
317 xpq_queue_tlb_flush()
318 {
319 struct mmuext_op op;
320 xpq_flush_queue();
321
322 XENPRINTK2(("xpq_queue_tlb_flush\n"));
323 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
324 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
325 panic("xpq_queue_tlb_flush");
326 }
327
328 void
329 xpq_flush_cache()
330 {
331 struct mmuext_op op;
332 int s = splvm();
333 xpq_flush_queue();
334
335 XENPRINTK2(("xpq_queue_flush_cache\n"));
336 op.cmd = MMUEXT_FLUSH_CACHE;
337 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
338 panic("xpq_flush_cache");
339 splx(s);
340 }
341
342 void
343 xpq_queue_invlpg(vaddr_t va)
344 {
345 struct mmuext_op op;
346 xpq_flush_queue();
347
348 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
349 op.cmd = MMUEXT_INVLPG_LOCAL;
350 op.arg1.linear_addr = (va & ~PAGE_MASK);
351 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
352 panic("xpq_queue_invlpg");
353 }
354
355 int
356 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
357 {
358 mmu_update_t op;
359 int ok;
360 xpq_flush_queue();
361
362 op.ptr = (paddr_t)ptr;
363 op.val = val;
364 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
365 return EFAULT;
366 return (0);
367 }
368 #else /* XEN3 */
369 void
370 xpq_queue_pt_switch(paddr_t pa)
371 {
372
373 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
374 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
375 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
376 xpq_increment_idx();
377 }
378
379 void
380 xpq_queue_pin_table(paddr_t pa)
381 {
382
383 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
384 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
385 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
386 xpq_increment_idx();
387 }
388
389 void
390 xpq_queue_unpin_table(paddr_t pa)
391 {
392
393 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
394 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
395 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
396 xpq_increment_idx();
397 }
398
399 void
400 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
401 {
402
403 XENPRINTK2(("xpq_queue_set_ldt\n"));
404 KASSERT(va == (va & ~PAGE_MASK));
405 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
406 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
407 xpq_increment_idx();
408 }
409
410 void
411 xpq_queue_tlb_flush()
412 {
413
414 XENPRINTK2(("xpq_queue_tlb_flush\n"));
415 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
416 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
417 xpq_increment_idx();
418 }
419
420 void
421 xpq_flush_cache()
422 {
423 int s = splvm();
424
425 XENPRINTK2(("xpq_queue_flush_cache\n"));
426 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
427 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
428 xpq_increment_idx();
429 xpq_flush_queue();
430 splx(s);
431 }
432
433 void
434 xpq_queue_invlpg(vaddr_t va)
435 {
436
437 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
438 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
439 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
440 xpq_increment_idx();
441 }
442
443 int
444 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
445 {
446 mmu_update_t xpq_up[3];
447
448 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
449 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
450 xpq_up[1].ptr = (paddr_t)ptr;
451 xpq_up[1].val = val;
452 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
453 return EFAULT;
454 return (0);
455 }
456 #endif /* XEN3 */
457
458 #ifdef XENDEBUG
459 void
460 xpq_debug_dump()
461 {
462 int i;
463
464 XENPRINTK2(("idx: %d\n", xpq_idx));
465 for (i = 0; i < xpq_idx; i++) {
466 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
467 (u_int)xpq_queue[i].val);
468 if (++i < xpq_idx)
469 sprintf(XBUF + strlen(XBUF), "%x %08x ",
470 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
471 if (++i < xpq_idx)
472 sprintf(XBUF + strlen(XBUF), "%x %08x ",
473 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
474 if (++i < xpq_idx)
475 sprintf(XBUF + strlen(XBUF), "%x %08x ",
476 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
477 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
478 }
479 }
480 #endif
481
482
483 #ifdef __x86_64__
484 extern volatile struct xencons_interface *xencons_interface; /* XXX */
485 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
486
487 static void xen_bt_set_readonly (vaddr_t);
488 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
489
490 /* How many PDEs ? */
491 #if L2_SLOT_KERNBASE > 0
492 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
493 #else
494 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
495 #endif
496
497 /*
498 * Construct and switch to new pagetables
499 * first_avail is the first vaddr we can use after
500 * we get rid of Xen pagetables
501 */
502
503 vaddr_t xen_pmap_bootstrap (void);
504
505 /*
506 * Function to get rid of Xen bootstrap tables
507 */
508
509 vaddr_t
510 xen_pmap_bootstrap()
511 {
512 int count, iocount = 0;
513 vaddr_t bootstrap_tables, init_tables;
514
515 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
516 init_tables = xen_start_info.pt_base;
517 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
518
519 /* Space after Xen boostrap tables should be free */
520 bootstrap_tables = xen_start_info.pt_base +
521 (xen_start_info.nr_pt_frames * PAGE_SIZE);
522
523 /* Calculate how many tables we need */
524 count = TABLE_L2_ENTRIES;
525
526 #ifdef DOM0OPS
527 if (xen_start_info.flags & SIF_INITDOMAIN) {
528 /* space for ISA I/O mem */
529 iocount = IOM_SIZE / PAGE_SIZE;
530 }
531 #endif
532
533 /*
534 * Xen space we'll reclaim may not be enough for our new page tables,
535 * move bootstrap tables if necessary
536 */
537
538 if (bootstrap_tables < init_tables + ((count+3+iocount) * PAGE_SIZE))
539 bootstrap_tables = init_tables +
540 ((count+3+iocount) * PAGE_SIZE);
541
542 /* Create temporary tables */
543 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
544 xen_start_info.nr_pt_frames, count, 0);
545
546 /* get vaddr space for the shared info and the console pages */
547
548 /* Create final tables */
549 xen_bootstrap_tables(bootstrap_tables, init_tables,
550 count + 3, count, 1);
551
552 return (init_tables + ((count + 3) * PAGE_SIZE));
553 }
554
555
556 /*
557 * Build a new table and switch to it
558 * old_count is # of old tables (including PGD, PDTPE and PDE)
559 * new_count is # of new tables (PTE only)
560 * we assume areas don't overlap
561 */
562
563
564 static void
565 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
566 int old_count, int new_count, int final)
567 {
568 pd_entry_t *pdtpe, *pde, *pte;
569 pd_entry_t *cur_pgd, *bt_pgd;
570 paddr_t addr, page;
571 vaddr_t avail, text_end, map_end;
572 int i;
573 extern char __data_start;
574
575 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
576 old_pgd, new_pgd, old_count, new_count));
577 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
578 /*
579 * size of R/W area after kernel text:
580 * xencons_interface (if present)
581 * xenstore_interface (if present)
582 * table pages (new_count + 3 entries)
583 * UAREA
584 * dummy user PGD
585 * extra mappings (only when final is true):
586 * HYPERVISOR_shared_info
587 * ISA I/O mem (if needed)
588 */
589 map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG);
590 if (final) {
591 HYPERVISOR_shared_info = (struct shared_info *)map_end;
592 map_end += NBPG;
593 }
594 #ifdef DOM0OPS
595 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
596 /* ISA I/O mem */
597 atdevbase = map_end;
598 map_end += IOM_SIZE;
599 }
600 #endif /* DOM0OPS */
601
602 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
603 text_end, map_end));
604
605 /*
606 * Create bootstrap page tables
607 * What we need:
608 * - a PGD (level 4)
609 * - a PDTPE (level 3)
610 * - a PDE (level2)
611 * - some PTEs (level 1)
612 */
613
614 cur_pgd = (pd_entry_t *) old_pgd;
615 bt_pgd = (pd_entry_t *) new_pgd;
616 memset (bt_pgd, 0, PAGE_SIZE);
617 avail = new_pgd + PAGE_SIZE;
618
619 /* Install level 3 */
620 pdtpe = (pd_entry_t *) avail;
621 memset (pdtpe, 0, PAGE_SIZE);
622 avail += PAGE_SIZE;
623
624 addr = ((paddr_t) pdtpe) - KERNBASE;
625 bt_pgd[pl4_pi(KERNTEXTOFF)] =
626 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
627
628 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
629 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
630
631 /* Level 2 */
632 pde = (pd_entry_t *) avail;
633 memset(pde, 0, PAGE_SIZE);
634 avail += PAGE_SIZE;
635
636 addr = ((paddr_t) pde) - KERNBASE;
637 pdtpe[pl3_pi(KERNTEXTOFF)] =
638 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
639 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
640 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
641
642 /* Level 1 */
643 page = KERNTEXTOFF;
644 for (i = 0; i < new_count; i ++) {
645 paddr_t cur_page = page;
646
647 pte = (pd_entry_t *) avail;
648 avail += PAGE_SIZE;
649
650 memset(pte, 0, PAGE_SIZE);
651 while (pl2_pi(page) == pl2_pi (cur_page)) {
652 if (page >= map_end) {
653 /* not mapped at all */
654 pte[pl1_pi(page)] = 0;
655 page += PAGE_SIZE;
656 continue;
657 }
658 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
659 if (page == (vaddr_t)HYPERVISOR_shared_info) {
660 pte[pl1_pi(page)] = xen_start_info.shared_info;
661 __PRINTK(("HYPERVISOR_shared_info "
662 "va 0x%lx pte 0x%lx\n",
663 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
664 }
665 if (xpmap_ptom_masked(page - KERNBASE) ==
666 (xen_start_info.console_mfn << PAGE_SHIFT)) {
667 xencons_interface = (void *)page;
668 pte[pl1_pi(page)] =
669 (xen_start_info.console_mfn << PAGE_SHIFT);
670 __PRINTK(("xencons_interface "
671 va 0x%lx pte 0x%lx\n",
672 xencons_interface, pte[pl1_pi(page)]));
673 }
674 if (xpmap_ptom_masked(page - KERNBASE) ==
675 (xen_start_info.store_mfn << PAGE_SHIFT)) {
676 xenstore_interface = (void *)page;
677 pte[pl1_pi(page)] =
678 (xen_start_info.store_mfn << PAGE_SHIFT);
679 __PRINTK(("xenstore_interface "
680 "va 0x%lx pte 0x%lx\n",
681 xenstore_interface, pte[pl1_pi(page)]));
682 }
683 #ifdef DOM0OPS
684 if (page >= (vaddr_t)atdevbase &&
685 page < (vaddr_t)atdevbase + IOM_SIZE) {
686 pte[pl1_pi(page)] =
687 IOM_BEGIN + (page - (vaddr_t)atdevbase);
688 }
689 #endif
690 pte[pl1_pi(page)] |= PG_u | PG_V;
691 if (page < text_end) {
692 /* map kernel text RO */
693 pte[pl1_pi(page)] |= 0;
694 } else if (page >= old_pgd
695 && page < old_pgd + (old_count * PAGE_SIZE)) {
696 /* map old page tables RO */
697 pte[pl1_pi(page)] |= 0;
698 } else if (page >= new_pgd &&
699 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
700 /* map new page tables RO */
701 pte[pl1_pi(page)] |= 0;
702 } else {
703 /* map page RW */
704 pte[pl1_pi(page)] |= PG_RW;
705 }
706 if (page == old_pgd)
707 __PRINTK(("va 0x%lx pa 0x%lx
708 "entry 0x%lx -> L1[0x%x]\n",
709 page, page - KERNBASE,
710 pte[pl1_pi(page)], pl1_pi(page)));
711 page += PAGE_SIZE;
712 }
713
714 addr = ((paddr_t) pte) - KERNBASE;
715 pde[pl2_pi(cur_page)] =
716 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
717 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
718 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
719 /* Mark readonly */
720 xen_bt_set_readonly((vaddr_t) pte);
721 }
722
723 /* Install recursive page tables mapping */
724 bt_pgd[PDIR_SLOT_PTE] =
725 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
726 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
727 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
728
729 /* Mark tables RO */
730 xen_bt_set_readonly((vaddr_t) pde);
731 xen_bt_set_readonly((vaddr_t) pdtpe);
732 xen_bt_set_readonly(new_pgd);
733 /* Pin the PGD */
734 __PRINTK(("pin PDG\n"));
735 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
736 /* Switch to new tables */
737 __PRINTK(("switch to PDG\n"));
738 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
739 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
740 bt_pgd[PDIR_SLOT_PTE]));
741 __PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE));
742 __PRINTK(("value 0x%lx\n", *L4_BASE));
743 __PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE]));
744
745 /* Now we can safely reclaim space taken by old tables */
746
747 __PRINTK(("unpin old PDG\n"));
748 /* Unpin old PGD */
749 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
750 /* Mark old tables RW */
751 page = old_pgd;
752 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
753 addr = xpmap_mtop(addr);
754 pte = (pd_entry_t *) (addr + KERNBASE);
755 pte += pl1_pi(page);
756 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
757 pde[pl2_pi(page)], addr, pte));
758 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
759 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
760 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
761 page += PAGE_SIZE;
762 /*
763 * Our ptes are contiguous
764 * so it's safe to just "++" here
765 */
766 pte++;
767 }
768 xpq_flush_queue();
769 }
770
771
772 void
773 xen_set_user_pgd(paddr_t page)
774 {
775 struct mmuext_op op;
776 int s = splvm();
777
778 xpq_flush_queue();
779 op.cmd = MMUEXT_NEW_USER_BASEPTR;
780 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
781 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
782 panic("xen_set_user_pgd: failed to install new user page"
783 " directory %lx", page);
784 splx(s);
785 }
786
787 /*
788 * Bootstrap helper functions
789 */
790
791 /*
792 * Mark a page readonly
793 * XXX: assuming vaddr = paddr + KERNBASE
794 */
795
796 static void
797 xen_bt_set_readonly (vaddr_t page)
798 {
799 pt_entry_t entry;
800
801 entry = xpmap_ptom_masked(page - KERNBASE);
802 entry |= PG_u | PG_V;
803
804 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
805 }
806 #endif /* x86_64 */
807