x86_xpmap.c revision 1.3.12.2 1 /* $NetBSD: x86_xpmap.c,v 1.3.12.2 2007/12/15 16:28:29 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49 /*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 * notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 * notice, this list of conditions and the following disclaimer in the
61 * documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 * must display the following acknowledgement:
64 * This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 * derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.3.12.2 2007/12/15 16:28:29 bouyer Exp $");
83
84 #include "opt_xen.h"
85 #include "opt_ddb.h"
86 #include "ksyms.h"
87
88 #include <sys/param.h>
89 #include <sys/systm.h>
90
91 #include <uvm/uvm.h>
92
93 #include <machine/pmap.h>
94 #include <machine/gdt.h>
95 #include <xen/xenfunc.h>
96
97 #include <dev/isa/isareg.h>
98 #include <machine/isa_machdep.h>
99
100 #undef XENDEBUG
101 /* #define XENDEBUG_SYNC */
102 /* #define XENDEBUG_LOW */
103
104 #ifdef XENDEBUG
105 #define XENPRINTF(x) printf x
106 #define XENPRINTK(x) printk x
107 #define XENPRINTK2(x) /* printk x */
108
109 static char XBUF[256];
110 #else
111 #define XENPRINTF(x)
112 #define XENPRINTK(x)
113 #define XENPRINTK2(x)
114 #endif
115 #define PRINTF(x) printf x
116 #define PRINTK(x) printk x
117
118 volatile shared_info_t *HYPERVISOR_shared_info;
119 union start_info_union start_info_union;
120
121 void xen_failsafe_handler(void);
122
123 #ifdef XEN3
124 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
125 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
126 #else
127 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
128 HYPERVISOR_mmu_update((req), (count), (success_count))
129 #endif
130
131 void
132 xen_failsafe_handler(void)
133 {
134
135 panic("xen_failsafe_handler called!\n");
136 }
137
138
139 #ifndef __x86_64__
140 void
141 xen_update_descriptor(union descriptor *table, union descriptor *entry)
142 {
143 paddr_t pa;
144 pt_entry_t *ptp;
145
146 ptp = kvtopte((vaddr_t)table);
147 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
148 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
149 panic("HYPERVISOR_update_descriptor failed\n");
150 }
151 #endif
152
153 void
154 xen_set_ldt(vaddr_t base, uint32_t entries)
155 {
156 vaddr_t va;
157 vaddr_t end;
158 pt_entry_t *ptp, *maptp;
159 int s;
160
161 #ifdef __x86_64__
162 end = base + (entries << 3);
163 #else
164 end = base + entries * sizeof(union descriptor);
165 #endif
166
167 for (va = base; va < end; va += PAGE_SIZE) {
168 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
169 ptp = kvtopte(va);
170 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
171 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
172 entries, ptp, maptp));
173 PTE_CLEARBITS(ptp, maptp, PG_RW);
174 }
175 s = splvm();
176 PTE_UPDATES_FLUSH();
177
178 xpq_queue_set_ldt(base, entries);
179 xpq_flush_queue();
180 splx(s);
181 }
182
183 #ifdef XENDEBUG
184 void xpq_debug_dump(void);
185 #endif
186
187 #define XPQUEUE_SIZE 2048
188 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
189 static int xpq_idx = 0;
190
191 void
192 xpq_flush_queue()
193 {
194 int i, ok;
195
196 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
197 for (i = 0; i < xpq_idx; i++)
198 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
199 (u_int)xpq_queue[i].val));
200 if (xpq_idx != 0 &&
201 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
202 printf("xpq_flush_queue: %d entries \n", xpq_idx);
203 for (i = 0; i < xpq_idx; i++)
204 printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
205 (u_int64_t)xpq_queue[i].ptr,
206 (u_int64_t)xpq_queue[i].val);
207 panic("HYPERVISOR_mmu_update failed\n");
208 }
209 xpq_idx = 0;
210 }
211
212 static inline void
213 xpq_increment_idx(void)
214 {
215
216 xpq_idx++;
217 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
218 xpq_flush_queue();
219 }
220
221 void
222 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
223 {
224 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
225 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
226 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
227 xpq_increment_idx();
228 #ifdef XENDEBUG_SYNC
229 xpq_flush_queue();
230 #endif
231 }
232
233 void
234 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
235 {
236
237 KASSERT(((paddr_t)ptr & 3) == 0);
238 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
239 xpq_queue[xpq_idx].val = val;
240 xpq_increment_idx();
241 #ifdef XENDEBUG_SYNC
242 xpq_flush_queue();
243 #endif
244 }
245
246 void
247 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
248 {
249
250 KASSERT(((paddr_t)ptr & 3) == 0);
251 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
252 xpq_queue[xpq_idx].val = val;
253 xpq_increment_idx();
254 #ifdef XENDEBUG_SYNC
255 xpq_flush_queue();
256 #endif
257 }
258
259 #ifdef XEN3
260 void
261 xpq_queue_pt_switch(paddr_t pa)
262 {
263 struct mmuext_op op;
264 xpq_flush_queue();
265
266 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
267 op.cmd = MMUEXT_NEW_BASEPTR;
268 op.arg1.mfn = pa >> PAGE_SHIFT;
269 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
270 panic("xpq_queue_pt_switch");
271 }
272
273 void
274 xpq_queue_pin_table(paddr_t pa)
275 {
276 struct mmuext_op op;
277 xpq_flush_queue();
278
279 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
280 op.arg1.mfn = pa >> PAGE_SHIFT;
281
282 #ifdef __x86_64__
283 op.cmd = MMUEXT_PIN_L4_TABLE;
284 #else
285 op.cmd = MMUEXT_PIN_L2_TABLE;
286 #endif
287 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
288 panic("xpq_queue_pin_table");
289 }
290
291 void
292 xpq_queue_unpin_table(paddr_t pa)
293 {
294 struct mmuext_op op;
295 xpq_flush_queue();
296
297 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
298 op.arg1.mfn = pa >> PAGE_SHIFT;
299 op.cmd = MMUEXT_UNPIN_TABLE;
300 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
301 panic("xpq_queue_unpin_table");
302 }
303
304 void
305 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
306 {
307 struct mmuext_op op;
308 xpq_flush_queue();
309
310 XENPRINTK2(("xpq_queue_set_ldt\n"));
311 KASSERT(va == (va & ~PAGE_MASK));
312 op.cmd = MMUEXT_SET_LDT;
313 op.arg1.linear_addr = va;
314 op.arg2.nr_ents = entries;
315 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
316 panic("xpq_queue_set_ldt");
317 }
318
319 void
320 xpq_queue_tlb_flush()
321 {
322 struct mmuext_op op;
323 xpq_flush_queue();
324
325 XENPRINTK2(("xpq_queue_tlb_flush\n"));
326 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
327 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
328 panic("xpq_queue_tlb_flush");
329 }
330
331 void
332 xpq_flush_cache()
333 {
334 struct mmuext_op op;
335 int s = splvm();
336 xpq_flush_queue();
337
338 XENPRINTK2(("xpq_queue_flush_cache\n"));
339 op.cmd = MMUEXT_FLUSH_CACHE;
340 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
341 panic("xpq_flush_cache");
342 splx(s);
343 }
344
345 void
346 xpq_queue_invlpg(vaddr_t va)
347 {
348 struct mmuext_op op;
349 xpq_flush_queue();
350
351 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
352 op.cmd = MMUEXT_INVLPG_LOCAL;
353 op.arg1.linear_addr = (va & ~PAGE_MASK);
354 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
355 panic("xpq_queue_invlpg");
356 }
357
358 int
359 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
360 {
361 mmu_update_t op;
362 int ok;
363 xpq_flush_queue();
364
365 op.ptr = (paddr_t)ptr;
366 op.val = val;
367 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
368 return EFAULT;
369 return (0);
370 }
371 #else /* XEN3 */
372 void
373 xpq_queue_pt_switch(paddr_t pa)
374 {
375
376 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
377 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
378 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
379 xpq_increment_idx();
380 }
381
382 void
383 xpq_queue_pin_table(paddr_t pa)
384 {
385
386 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
387 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
388 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
389 xpq_increment_idx();
390 }
391
392 void
393 xpq_queue_unpin_table(paddr_t pa)
394 {
395
396 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
397 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
398 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
399 xpq_increment_idx();
400 }
401
402 void
403 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
404 {
405
406 XENPRINTK2(("xpq_queue_set_ldt\n"));
407 KASSERT(va == (va & ~PAGE_MASK));
408 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
409 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
410 xpq_increment_idx();
411 }
412
413 void
414 xpq_queue_tlb_flush()
415 {
416
417 XENPRINTK2(("xpq_queue_tlb_flush\n"));
418 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
419 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
420 xpq_increment_idx();
421 }
422
423 void
424 xpq_flush_cache()
425 {
426 int s = splvm();
427
428 XENPRINTK2(("xpq_queue_flush_cache\n"));
429 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
430 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
431 xpq_increment_idx();
432 xpq_flush_queue();
433 splx(s);
434 }
435
436 void
437 xpq_queue_invlpg(vaddr_t va)
438 {
439
440 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
441 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
442 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
443 xpq_increment_idx();
444 }
445
446 int
447 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
448 {
449 mmu_update_t xpq_up[3];
450
451 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
452 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
453 xpq_up[1].ptr = (paddr_t)ptr;
454 xpq_up[1].val = val;
455 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
456 return EFAULT;
457 return (0);
458 }
459 #endif /* XEN3 */
460
461 #ifdef XENDEBUG
462 void
463 xpq_debug_dump()
464 {
465 int i;
466
467 XENPRINTK2(("idx: %d\n", xpq_idx));
468 for (i = 0; i < xpq_idx; i++) {
469 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
470 (u_int)xpq_queue[i].val);
471 if (++i < xpq_idx)
472 sprintf(XBUF + strlen(XBUF), "%x %08x ",
473 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
474 if (++i < xpq_idx)
475 sprintf(XBUF + strlen(XBUF), "%x %08x ",
476 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
477 if (++i < xpq_idx)
478 sprintf(XBUF + strlen(XBUF), "%x %08x ",
479 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
480 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
481 }
482 }
483 #endif
484
485
486 extern volatile struct xencons_interface *xencons_interface; /* XXX */
487 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
488
489 static void xen_bt_set_readonly (vaddr_t);
490 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
491
492 /* How many PDEs ? */
493 #if L2_SLOT_KERNBASE > 0
494 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
495 #else
496 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
497 #endif
498
499 /*
500 * Construct and switch to new pagetables
501 * first_avail is the first vaddr we can use after
502 * we get rid of Xen pagetables
503 */
504
505 vaddr_t xen_pmap_bootstrap (void);
506
507 /*
508 * Function to get rid of Xen bootstrap tables
509 */
510
511 vaddr_t
512 xen_pmap_bootstrap()
513 {
514 int count, oldcount;
515 long mapsize;
516 const int l2_4_count = PTP_LEVELS - 1;
517 vaddr_t bootstrap_tables, init_tables;
518
519 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
520 init_tables = xen_start_info.pt_base;
521 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
522
523 /* Space after Xen boostrap tables should be free */
524 bootstrap_tables = xen_start_info.pt_base +
525 (xen_start_info.nr_pt_frames * PAGE_SIZE);
526
527 /*
528 * Calculate how many space we need
529 * first everything mapped before the Xen bootstrap tables
530 */
531 mapsize = init_tables - KERNTEXTOFF;
532 /* after the tables we'll have:
533 * - UAREA
534 * - dummy user PGD (x86_64)
535 * - ISA MEM space
536 * - HYPERVISOR_shared_info
537 * - ISA I/O mem (if needed)
538 */
539 mapsize += UPAGES * NBPG;
540 #ifdef __x86_64__
541 mapsize += NBPG;
542 #endif
543 mapsize += NBPG;
544
545 #ifdef DOM0OPS
546 if (xen_start_info.flags & SIF_INITDOMAIN) {
547 /* space for ISA I/O mem */
548 mapsize += IOM_SIZE;
549 }
550 #endif
551 /* at this point mapsize doens't include the table size */
552
553 #ifdef __x86_64__
554 count = TABLE_L2_ENTRIES;
555 #else
556 count = (mapsize + (NBPD_L2 -1)) >> L2_SHIFT;
557 #endif /* __x86_64__ */
558
559 /* now compute how many L2 pages we need exactly */
560 printk("bootstrap_final count %d\n", count);
561 while (mapsize + (count + l2_4_count) * PAGE_SIZE >
562 ((long)count << L2_SHIFT) + KERNTEXTOFF) {
563 count++;
564 }
565 #ifndef __x86_64__
566 nkptp[1] = count;
567 #endif
568
569 /*
570 * install bootstrap pages. We may need more L2 pages than will
571 * have the final table here, as it's installed after the final table
572 */
573 oldcount = count;
574
575 bootstrap_again:
576 printk("bootstrap_again oldcount %d\n", oldcount);
577 /*
578 * Xen space we'll reclaim may not be enough for our new page tables,
579 * move bootstrap tables if necessary
580 */
581 if (bootstrap_tables < init_tables + ((count + l2_4_count) * PAGE_SIZE))
582 bootstrap_tables = init_tables +
583 ((count + l2_4_count) * PAGE_SIZE);
584 /* make sure we have enough to map the bootstrap_tables */
585 if (bootstrap_tables + ((oldcount + l2_4_count) * PAGE_SIZE) >
586 ((long)oldcount << L2_SHIFT) + KERNTEXTOFF) {
587 oldcount++;
588 goto bootstrap_again;
589 }
590
591 /* Create temporary tables */
592 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
593 xen_start_info.nr_pt_frames, oldcount, 0);
594
595 /* Create final tables */
596 xen_bootstrap_tables(bootstrap_tables, init_tables,
597 oldcount + l2_4_count, count, 1);
598
599 return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
600 }
601
602
603 /*
604 * Build a new table and switch to it
605 * old_count is # of old tables (including PGD, PDTPE and PDE)
606 * new_count is # of new tables (PTE only)
607 * we assume areas don't overlap
608 */
609
610
611 static void
612 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
613 int old_count, int new_count, int final)
614 {
615 pd_entry_t *pdtpe, *pde, *pte;
616 pd_entry_t *cur_pgd, *bt_pgd;
617 paddr_t addr, page;
618 vaddr_t avail, text_end, map_end;
619 int i;
620 extern char __data_start;
621
622 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
623 old_pgd, new_pgd, old_count, new_count));
624 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
625 /*
626 * size of R/W area after kernel text:
627 * xencons_interface (if present)
628 * xenstore_interface (if present)
629 * table pages (new_count + 3 entries)
630 * extra mappings (only when final is true):
631 * UAREA
632 * dummy user PGD (x86_64 only)
633 * HYPERVISOR_shared_info
634 * ISA I/O mem (if needed)
635 */
636 map_end = new_pgd + ((new_count + 3) * NBPG);
637 if (final) {
638 map_end += (UPAGES + 1) * NBPG;
639 HYPERVISOR_shared_info = (struct shared_info *)map_end;
640 map_end += NBPG;
641 }
642 #ifdef DOM0OPS
643 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
644 /* ISA I/O mem */
645 atdevbase = map_end;
646 map_end += IOM_SIZE;
647 }
648 #endif /* DOM0OPS */
649
650 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
651 text_end, map_end));
652
653 /*
654 * Create bootstrap page tables
655 * What we need:
656 * - a PGD (level 4)
657 * - a PDTPE (level 3)
658 * - a PDE (level2)
659 * - some PTEs (level 1)
660 */
661
662 cur_pgd = (pd_entry_t *) old_pgd;
663 bt_pgd = (pd_entry_t *) new_pgd;
664 memset (bt_pgd, 0, PAGE_SIZE);
665 avail = new_pgd + PAGE_SIZE;
666 #if PTP_LEVELS > 3
667 /* Install level 3 */
668 pdtpe = (pd_entry_t *) avail;
669 memset (pdtpe, 0, PAGE_SIZE);
670 avail += PAGE_SIZE;
671
672 addr = ((paddr_t) pdtpe) - KERNBASE;
673 bt_pgd[pl4_pi(KERNTEXTOFF)] =
674 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
675
676 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
677 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
678 #else
679 pdtpe = bt_pgd;
680 #endif /* PTP_LEVELS > 3 */
681
682 #if PTP_LEVELS > 2
683 /* Level 2 */
684 pde = (pd_entry_t *) avail;
685 memset(pde, 0, PAGE_SIZE);
686 avail += PAGE_SIZE;
687
688 addr = ((paddr_t) pde) - KERNBASE;
689 pdtpe[pl3_pi(KERNTEXTOFF)] =
690 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
691 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
692 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
693 #else
694 pde = bt_pgd;
695 #endif /* PTP_LEVELS > 3 */
696
697 /* Level 1 */
698 page = KERNTEXTOFF;
699 for (i = 0; i < new_count; i ++) {
700 paddr_t cur_page = page;
701
702 pte = (pd_entry_t *) avail;
703 avail += PAGE_SIZE;
704
705 memset(pte, 0, PAGE_SIZE);
706 while (pl2_pi(page) == pl2_pi (cur_page)) {
707 if (page >= map_end) {
708 /* not mapped at all */
709 pte[pl1_pi(page)] = 0;
710 page += PAGE_SIZE;
711 continue;
712 }
713 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
714 if (page == (vaddr_t)HYPERVISOR_shared_info) {
715 pte[pl1_pi(page)] = xen_start_info.shared_info;
716 __PRINTK(("HYPERVISOR_shared_info "
717 "va 0x%lx pte 0x%lx\n",
718 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
719 }
720 if (xpmap_ptom_masked(page - KERNBASE) ==
721 (xen_start_info.console_mfn << PAGE_SHIFT)) {
722 xencons_interface = (void *)page;
723 pte[pl1_pi(page)] =
724 (xen_start_info.console_mfn << PAGE_SHIFT);
725 __PRINTK(("xencons_interface "
726 "va 0x%lx pte 0x%lx\n",
727 xencons_interface, pte[pl1_pi(page)]));
728 }
729 if (xpmap_ptom_masked(page - KERNBASE) ==
730 (xen_start_info.store_mfn << PAGE_SHIFT)) {
731 xenstore_interface = (void *)page;
732 pte[pl1_pi(page)] =
733 (xen_start_info.store_mfn << PAGE_SHIFT);
734 __PRINTK(("xenstore_interface "
735 "va 0x%lx pte 0x%lx\n",
736 xenstore_interface, pte[pl1_pi(page)]));
737 }
738 #ifdef DOM0OPS
739 if (page >= (vaddr_t)atdevbase &&
740 page < (vaddr_t)atdevbase + IOM_SIZE) {
741 pte[pl1_pi(page)] =
742 IOM_BEGIN + (page - (vaddr_t)atdevbase);
743 }
744 #endif
745 pte[pl1_pi(page)] |= PG_u | PG_V;
746 if (page < text_end) {
747 /* map kernel text RO */
748 pte[pl1_pi(page)] |= 0;
749 } else if (page >= old_pgd
750 && page < old_pgd + (old_count * PAGE_SIZE)) {
751 /* map old page tables RO */
752 pte[pl1_pi(page)] |= 0;
753 } else if (page >= new_pgd &&
754 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
755 /* map new page tables RO */
756 pte[pl1_pi(page)] |= 0;
757 } else {
758 /* map page RW */
759 pte[pl1_pi(page)] |= PG_RW;
760 }
761 if (page == old_pgd)
762 __PRINTK(("va 0x%lx pa 0x%lx "
763 "entry 0x%lx -> L1[0x%x]\n",
764 page, page - KERNBASE,
765 pte[pl1_pi(page)], pl1_pi(page)));
766 page += PAGE_SIZE;
767 }
768
769 addr = ((paddr_t) pte) - KERNBASE;
770 pde[pl2_pi(cur_page)] =
771 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
772 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
773 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
774 /* Mark readonly */
775 xen_bt_set_readonly((vaddr_t) pte);
776 }
777
778 /* Install recursive page tables mapping */
779 bt_pgd[PDIR_SLOT_PTE] =
780 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
781 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
782 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
783
784 /* Mark tables RO */
785 xen_bt_set_readonly((vaddr_t) pde);
786 #if PTP_LEVELS > 2
787 xen_bt_set_readonly((vaddr_t) pdtpe);
788 #endif
789 #if PTP_LEVELS > 3
790 xen_bt_set_readonly(new_pgd);
791 #endif
792 /* Pin the PGD */
793 __PRINTK(("pin PDG\n"));
794 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
795 /* Switch to new tables */
796 __PRINTK(("switch to PDG\n"));
797 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
798 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
799 bt_pgd[PDIR_SLOT_PTE]));
800
801 /* Now we can safely reclaim space taken by old tables */
802
803 __PRINTK(("unpin old PDG\n"));
804 /* Unpin old PGD */
805 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
806 /* Mark old tables RW */
807 page = old_pgd;
808 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
809 addr = xpmap_mtop(addr);
810 pte = (pd_entry_t *) (addr + KERNBASE);
811 pte += pl1_pi(page);
812 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
813 pde[pl2_pi(page)], addr, pte));
814 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
815 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
816 printk("addr 0x%lx pte 0x%lx *pte 0x%lx\n",
817 addr, pte, *pte);
818 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
819 page += PAGE_SIZE;
820 /*
821 * Our ptes are contiguous
822 * so it's safe to just "++" here
823 */
824 pte++;
825 }
826 xpq_flush_queue();
827 }
828
829
830 /*
831 * Bootstrap helper functions
832 */
833
834 /*
835 * Mark a page readonly
836 * XXX: assuming vaddr = paddr + KERNBASE
837 */
838
839 static void
840 xen_bt_set_readonly (vaddr_t page)
841 {
842 pt_entry_t entry;
843
844 entry = xpmap_ptom_masked(page - KERNBASE);
845 entry |= PG_u | PG_V;
846
847 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
848 }
849
850 #ifdef __x86_64__
851 void
852 xen_set_user_pgd(paddr_t page)
853 {
854 struct mmuext_op op;
855 int s = splvm();
856
857 xpq_flush_queue();
858 op.cmd = MMUEXT_NEW_USER_BASEPTR;
859 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
860 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
861 panic("xen_set_user_pgd: failed to install new user page"
862 " directory %lx", page);
863 splx(s);
864 }
865 #endif /* __x86_64__ */
866