x86_xpmap.c revision 1.3 1 /* $NetBSD: x86_xpmap.c,v 1.3 2007/11/23 09:54:33 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49 /*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 * notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 * notice, this list of conditions and the following disclaimer in the
61 * documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 * must display the following acknowledgement:
64 * This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 * derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.3 2007/11/23 09:54:33 bouyer Exp $");
83
84 #include "opt_xen.h"
85
86 #include <sys/param.h>
87 #include <sys/systm.h>
88
89 #include <uvm/uvm.h>
90
91 #include <machine/pmap.h>
92 #include <machine/gdt.h>
93 #include <xen/xenfunc.h>
94
95 #include <dev/isa/isareg.h>
96 #include <machine/isa_machdep.h>
97
98 #undef XENDEBUG
99 /* #define XENDEBUG_SYNC */
100 /* #define XENDEBUG_LOW */
101
102 #ifdef XENDEBUG
103 #define XENPRINTF(x) printf x
104 #define XENPRINTK(x) printk x
105 #define XENPRINTK2(x) /* printk x */
106
107 static char XBUF[256];
108 #else
109 #define XENPRINTF(x)
110 #define XENPRINTK(x)
111 #define XENPRINTK2(x)
112 #endif
113 #define PRINTF(x) printf x
114 #define PRINTK(x) printk x
115
116 volatile shared_info_t *HYPERVISOR_shared_info;
117 union start_info_union start_info_union;
118
119 void xen_failsafe_handler(void);
120
121 #ifdef XEN3
122 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
123 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
124 #else
125 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
126 HYPERVISOR_mmu_update((req), (count), (success_count))
127 #endif
128
129 void
130 xen_failsafe_handler(void)
131 {
132
133 panic("xen_failsafe_handler called!\n");
134 }
135
136
137 #ifndef __x86_64__
138 void
139 xen_update_descriptor(union descriptor *table, union descriptor *entry)
140 {
141 paddr_t pa;
142 pt_entry_t *ptp;
143
144 ptp = kvtopte((vaddr_t)table);
145 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
146 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
147 panic("HYPERVISOR_update_descriptor failed\n");
148 }
149 #endif
150
151 void
152 xen_set_ldt(vaddr_t base, uint32_t entries)
153 {
154 vaddr_t va;
155 vaddr_t end;
156 pt_entry_t *ptp, *maptp;
157 int s;
158
159 #ifdef __x86_64__
160 end = base + (entries << 3);
161 #else
162 end = base + entries * sizeof(union descriptor);
163 #endif
164
165 for (va = base; va < end; va += PAGE_SIZE) {
166 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
167 ptp = kvtopte(va);
168 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
169 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
170 entries, ptp, maptp));
171 PTE_CLEARBITS(ptp, maptp, PG_RW);
172 }
173 s = splvm();
174 PTE_UPDATES_FLUSH();
175
176 xpq_queue_set_ldt(base, entries);
177 xpq_flush_queue();
178 splx(s);
179 }
180
181 #ifdef XENDEBUG
182 void xpq_debug_dump(void);
183 #endif
184
185 #define XPQUEUE_SIZE 2048
186 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
187 static int xpq_idx = 0;
188
189 void
190 xpq_flush_queue()
191 {
192 int i, ok;
193
194 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
195 for (i = 0; i < xpq_idx; i++)
196 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
197 (u_int)xpq_queue[i].val));
198 if (xpq_idx != 0 &&
199 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
200 printf("xpq_flush_queue: %d entries \n", xpq_idx);
201 for (i = 0; i < xpq_idx; i++)
202 printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
203 (u_int64_t)xpq_queue[i].ptr,
204 (u_int64_t)xpq_queue[i].val);
205 panic("HYPERVISOR_mmu_update failed\n");
206 }
207 xpq_idx = 0;
208 }
209
210 static inline void
211 xpq_increment_idx(void)
212 {
213
214 xpq_idx++;
215 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
216 xpq_flush_queue();
217 }
218
219 void
220 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
221 {
222 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
223 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
224 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
225 xpq_increment_idx();
226 #ifdef XENDEBUG_SYNC
227 xpq_flush_queue();
228 #endif
229 }
230
231 void
232 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
233 {
234
235 KASSERT(((paddr_t)ptr & 3) == 0);
236 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
237 xpq_queue[xpq_idx].val = val;
238 xpq_increment_idx();
239 #ifdef XENDEBUG_SYNC
240 xpq_flush_queue();
241 #endif
242 }
243
244 void
245 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
246 {
247
248 KASSERT(((paddr_t)ptr & 3) == 0);
249 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
250 xpq_queue[xpq_idx].val = val;
251 xpq_increment_idx();
252 #ifdef XENDEBUG_SYNC
253 xpq_flush_queue();
254 #endif
255 }
256
257 #ifdef XEN3
258 void
259 xpq_queue_pt_switch(paddr_t pa)
260 {
261 struct mmuext_op op;
262 xpq_flush_queue();
263
264 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
265 op.cmd = MMUEXT_NEW_BASEPTR;
266 op.arg1.mfn = pa >> PAGE_SHIFT;
267 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
268 panic("xpq_queue_pt_switch");
269 }
270
271 void
272 xpq_queue_pin_table(paddr_t pa)
273 {
274 struct mmuext_op op;
275 xpq_flush_queue();
276
277 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
278 op.arg1.mfn = pa >> PAGE_SHIFT;
279
280 #ifdef __x86_64__
281 op.cmd = MMUEXT_PIN_L4_TABLE;
282 #else
283 op.cmd = MMUEXT_PIN_L2_TABLE;
284 #endif
285 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
286 panic("xpq_queue_pin_table");
287 }
288
289 void
290 xpq_queue_unpin_table(paddr_t pa)
291 {
292 struct mmuext_op op;
293 xpq_flush_queue();
294
295 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
296 op.arg1.mfn = pa >> PAGE_SHIFT;
297 op.cmd = MMUEXT_UNPIN_TABLE;
298 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
299 panic("xpq_queue_unpin_table");
300 }
301
302 void
303 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
304 {
305 struct mmuext_op op;
306 xpq_flush_queue();
307
308 XENPRINTK2(("xpq_queue_set_ldt\n"));
309 KASSERT(va == (va & ~PAGE_MASK));
310 op.cmd = MMUEXT_SET_LDT;
311 op.arg1.linear_addr = va;
312 op.arg2.nr_ents = entries;
313 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
314 panic("xpq_queue_set_ldt");
315 }
316
317 void
318 xpq_queue_tlb_flush()
319 {
320 struct mmuext_op op;
321 xpq_flush_queue();
322
323 XENPRINTK2(("xpq_queue_tlb_flush\n"));
324 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
325 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
326 panic("xpq_queue_tlb_flush");
327 }
328
329 void
330 xpq_flush_cache()
331 {
332 struct mmuext_op op;
333 int s = splvm();
334 xpq_flush_queue();
335
336 XENPRINTK2(("xpq_queue_flush_cache\n"));
337 op.cmd = MMUEXT_FLUSH_CACHE;
338 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
339 panic("xpq_flush_cache");
340 splx(s);
341 }
342
343 void
344 xpq_queue_invlpg(vaddr_t va)
345 {
346 struct mmuext_op op;
347 xpq_flush_queue();
348
349 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
350 op.cmd = MMUEXT_INVLPG_LOCAL;
351 op.arg1.linear_addr = (va & ~PAGE_MASK);
352 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
353 panic("xpq_queue_invlpg");
354 }
355
356 int
357 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
358 {
359 mmu_update_t op;
360 int ok;
361 xpq_flush_queue();
362
363 op.ptr = (paddr_t)ptr;
364 op.val = val;
365 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
366 return EFAULT;
367 return (0);
368 }
369 #else /* XEN3 */
370 void
371 xpq_queue_pt_switch(paddr_t pa)
372 {
373
374 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
375 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
376 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
377 xpq_increment_idx();
378 }
379
380 void
381 xpq_queue_pin_table(paddr_t pa)
382 {
383
384 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
385 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
386 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
387 xpq_increment_idx();
388 }
389
390 void
391 xpq_queue_unpin_table(paddr_t pa)
392 {
393
394 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
395 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
396 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
397 xpq_increment_idx();
398 }
399
400 void
401 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
402 {
403
404 XENPRINTK2(("xpq_queue_set_ldt\n"));
405 KASSERT(va == (va & ~PAGE_MASK));
406 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
407 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
408 xpq_increment_idx();
409 }
410
411 void
412 xpq_queue_tlb_flush()
413 {
414
415 XENPRINTK2(("xpq_queue_tlb_flush\n"));
416 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
417 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
418 xpq_increment_idx();
419 }
420
421 void
422 xpq_flush_cache()
423 {
424 int s = splvm();
425
426 XENPRINTK2(("xpq_queue_flush_cache\n"));
427 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
428 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
429 xpq_increment_idx();
430 xpq_flush_queue();
431 splx(s);
432 }
433
434 void
435 xpq_queue_invlpg(vaddr_t va)
436 {
437
438 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
439 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
440 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
441 xpq_increment_idx();
442 }
443
444 int
445 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
446 {
447 mmu_update_t xpq_up[3];
448
449 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
450 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
451 xpq_up[1].ptr = (paddr_t)ptr;
452 xpq_up[1].val = val;
453 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
454 return EFAULT;
455 return (0);
456 }
457 #endif /* XEN3 */
458
459 #ifdef XENDEBUG
460 void
461 xpq_debug_dump()
462 {
463 int i;
464
465 XENPRINTK2(("idx: %d\n", xpq_idx));
466 for (i = 0; i < xpq_idx; i++) {
467 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
468 (u_int)xpq_queue[i].val);
469 if (++i < xpq_idx)
470 sprintf(XBUF + strlen(XBUF), "%x %08x ",
471 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
472 if (++i < xpq_idx)
473 sprintf(XBUF + strlen(XBUF), "%x %08x ",
474 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
475 if (++i < xpq_idx)
476 sprintf(XBUF + strlen(XBUF), "%x %08x ",
477 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
478 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
479 }
480 }
481 #endif
482
483
484 #ifdef __x86_64__
485 extern volatile struct xencons_interface *xencons_interface; /* XXX */
486 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
487
488 static void xen_bt_set_readonly (vaddr_t);
489 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
490
491 /* How many PDEs ? */
492 #if L2_SLOT_KERNBASE > 0
493 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
494 #else
495 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
496 #endif
497
498 /*
499 * Construct and switch to new pagetables
500 * first_avail is the first vaddr we can use after
501 * we get rid of Xen pagetables
502 */
503
504 vaddr_t xen_pmap_bootstrap (void);
505
506 /*
507 * Function to get rid of Xen bootstrap tables
508 */
509
510 vaddr_t
511 xen_pmap_bootstrap()
512 {
513 int count, iocount = 0;
514 vaddr_t bootstrap_tables, init_tables;
515
516 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
517 init_tables = xen_start_info.pt_base;
518 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
519
520 /* Space after Xen boostrap tables should be free */
521 bootstrap_tables = xen_start_info.pt_base +
522 (xen_start_info.nr_pt_frames * PAGE_SIZE);
523
524 /* Calculate how many tables we need */
525 count = TABLE_L2_ENTRIES;
526
527 #ifdef DOM0OPS
528 if (xen_start_info.flags & SIF_INITDOMAIN) {
529 /* space for ISA I/O mem */
530 iocount = IOM_SIZE / PAGE_SIZE;
531 }
532 #endif
533
534 /*
535 * Xen space we'll reclaim may not be enough for our new page tables,
536 * move bootstrap tables if necessary
537 */
538
539 if (bootstrap_tables < init_tables + ((count+3+iocount) * PAGE_SIZE))
540 bootstrap_tables = init_tables +
541 ((count+3+iocount) * PAGE_SIZE);
542
543 /* Create temporary tables */
544 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
545 xen_start_info.nr_pt_frames, count, 0);
546
547 /* get vaddr space for the shared info and the console pages */
548
549 /* Create final tables */
550 xen_bootstrap_tables(bootstrap_tables, init_tables,
551 count + 3, count, 1);
552
553 return (init_tables + ((count + 3) * PAGE_SIZE));
554 }
555
556
557 /*
558 * Build a new table and switch to it
559 * old_count is # of old tables (including PGD, PDTPE and PDE)
560 * new_count is # of new tables (PTE only)
561 * we assume areas don't overlap
562 */
563
564
565 static void
566 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
567 int old_count, int new_count, int final)
568 {
569 pd_entry_t *pdtpe, *pde, *pte;
570 pd_entry_t *cur_pgd, *bt_pgd;
571 paddr_t addr, page;
572 vaddr_t avail, text_end, map_end;
573 int i;
574 extern char __data_start;
575
576 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
577 old_pgd, new_pgd, old_count, new_count));
578 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
579 /*
580 * size of R/W area after kernel text:
581 * xencons_interface (if present)
582 * xenstore_interface (if present)
583 * table pages (new_count + 3 entries)
584 * UAREA
585 * dummy user PGD
586 * extra mappings (only when final is true):
587 * HYPERVISOR_shared_info
588 * ISA I/O mem (if needed)
589 */
590 map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG);
591 if (final) {
592 HYPERVISOR_shared_info = (struct shared_info *)map_end;
593 map_end += NBPG;
594 }
595 #ifdef DOM0OPS
596 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
597 /* ISA I/O mem */
598 atdevbase = map_end;
599 map_end += IOM_SIZE;
600 }
601 #endif /* DOM0OPS */
602
603 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
604 text_end, map_end));
605
606 /*
607 * Create bootstrap page tables
608 * What we need:
609 * - a PGD (level 4)
610 * - a PDTPE (level 3)
611 * - a PDE (level2)
612 * - some PTEs (level 1)
613 */
614
615 cur_pgd = (pd_entry_t *) old_pgd;
616 bt_pgd = (pd_entry_t *) new_pgd;
617 memset (bt_pgd, 0, PAGE_SIZE);
618 avail = new_pgd + PAGE_SIZE;
619
620 /* Install level 3 */
621 pdtpe = (pd_entry_t *) avail;
622 memset (pdtpe, 0, PAGE_SIZE);
623 avail += PAGE_SIZE;
624
625 addr = ((paddr_t) pdtpe) - KERNBASE;
626 bt_pgd[pl4_pi(KERNTEXTOFF)] =
627 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
628
629 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
630 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
631
632 /* Level 2 */
633 pde = (pd_entry_t *) avail;
634 memset(pde, 0, PAGE_SIZE);
635 avail += PAGE_SIZE;
636
637 addr = ((paddr_t) pde) - KERNBASE;
638 pdtpe[pl3_pi(KERNTEXTOFF)] =
639 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
640 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
641 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
642
643 /* Level 1 */
644 page = KERNTEXTOFF;
645 for (i = 0; i < new_count; i ++) {
646 paddr_t cur_page = page;
647
648 pte = (pd_entry_t *) avail;
649 avail += PAGE_SIZE;
650
651 memset(pte, 0, PAGE_SIZE);
652 while (pl2_pi(page) == pl2_pi (cur_page)) {
653 if (page >= map_end) {
654 /* not mapped at all */
655 pte[pl1_pi(page)] = 0;
656 page += PAGE_SIZE;
657 continue;
658 }
659 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
660 if (page == (vaddr_t)HYPERVISOR_shared_info) {
661 pte[pl1_pi(page)] = xen_start_info.shared_info;
662 __PRINTK(("HYPERVISOR_shared_info "
663 "va 0x%lx pte 0x%lx\n",
664 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
665 }
666 if (xpmap_ptom_masked(page - KERNBASE) ==
667 (xen_start_info.console_mfn << PAGE_SHIFT)) {
668 xencons_interface = (void *)page;
669 pte[pl1_pi(page)] =
670 (xen_start_info.console_mfn << PAGE_SHIFT);
671 __PRINTK(("xencons_interface "
672 va 0x%lx pte 0x%lx\n",
673 xencons_interface, pte[pl1_pi(page)]));
674 }
675 if (xpmap_ptom_masked(page - KERNBASE) ==
676 (xen_start_info.store_mfn << PAGE_SHIFT)) {
677 xenstore_interface = (void *)page;
678 pte[pl1_pi(page)] =
679 (xen_start_info.store_mfn << PAGE_SHIFT);
680 __PRINTK(("xenstore_interface "
681 "va 0x%lx pte 0x%lx\n",
682 xenstore_interface, pte[pl1_pi(page)]));
683 }
684 #ifdef DOM0OPS
685 if (page >= (vaddr_t)atdevbase &&
686 page < (vaddr_t)atdevbase + IOM_SIZE) {
687 pte[pl1_pi(page)] =
688 IOM_BEGIN + (page - (vaddr_t)atdevbase);
689 }
690 #endif
691 pte[pl1_pi(page)] |= PG_u | PG_V;
692 if (page < text_end) {
693 /* map kernel text RO */
694 pte[pl1_pi(page)] |= 0;
695 } else if (page >= old_pgd
696 && page < old_pgd + (old_count * PAGE_SIZE)) {
697 /* map old page tables RO */
698 pte[pl1_pi(page)] |= 0;
699 } else if (page >= new_pgd &&
700 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
701 /* map new page tables RO */
702 pte[pl1_pi(page)] |= 0;
703 } else {
704 /* map page RW */
705 pte[pl1_pi(page)] |= PG_RW;
706 }
707 if (page == old_pgd)
708 __PRINTK(("va 0x%lx pa 0x%lx
709 "entry 0x%lx -> L1[0x%x]\n",
710 page, page - KERNBASE,
711 pte[pl1_pi(page)], pl1_pi(page)));
712 page += PAGE_SIZE;
713 }
714
715 addr = ((paddr_t) pte) - KERNBASE;
716 pde[pl2_pi(cur_page)] =
717 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
718 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
719 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
720 /* Mark readonly */
721 xen_bt_set_readonly((vaddr_t) pte);
722 }
723
724 /* Install recursive page tables mapping */
725 bt_pgd[PDIR_SLOT_PTE] =
726 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
727 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
728 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
729
730 /* Mark tables RO */
731 xen_bt_set_readonly((vaddr_t) pde);
732 xen_bt_set_readonly((vaddr_t) pdtpe);
733 xen_bt_set_readonly(new_pgd);
734 /* Pin the PGD */
735 __PRINTK(("pin PDG\n"));
736 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
737 /* Switch to new tables */
738 __PRINTK(("switch to PDG\n"));
739 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
740 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
741 bt_pgd[PDIR_SLOT_PTE]));
742 __PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE));
743 __PRINTK(("value 0x%lx\n", *L4_BASE));
744 __PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE]));
745
746 /* Now we can safely reclaim space taken by old tables */
747
748 __PRINTK(("unpin old PDG\n"));
749 /* Unpin old PGD */
750 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
751 /* Mark old tables RW */
752 page = old_pgd;
753 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
754 addr = xpmap_mtop(addr);
755 pte = (pd_entry_t *) (addr + KERNBASE);
756 pte += pl1_pi(page);
757 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
758 pde[pl2_pi(page)], addr, pte));
759 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
760 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
761 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
762 page += PAGE_SIZE;
763 /*
764 * Our ptes are contiguous
765 * so it's safe to just "++" here
766 */
767 pte++;
768 }
769 xpq_flush_queue();
770 }
771
772
773 void
774 xen_set_user_pgd(paddr_t page)
775 {
776 struct mmuext_op op;
777 int s = splvm();
778
779 xpq_flush_queue();
780 op.cmd = MMUEXT_NEW_USER_BASEPTR;
781 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
782 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
783 panic("xen_set_user_pgd: failed to install new user page"
784 " directory %lx", page);
785 splx(s);
786 }
787
788 /*
789 * Bootstrap helper functions
790 */
791
792 /*
793 * Mark a page readonly
794 * XXX: assuming vaddr = paddr + KERNBASE
795 */
796
797 static void
798 xen_bt_set_readonly (vaddr_t page)
799 {
800 pt_entry_t entry;
801
802 entry = xpmap_ptom_masked(page - KERNBASE);
803 entry |= PG_u | PG_V;
804
805 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
806 }
807 #endif /* x86_64 */
808