x86_xpmap.c revision 1.3.12.1 1 /* $NetBSD: x86_xpmap.c,v 1.3.12.1 2007/12/11 23:03:02 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49 /*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 * notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 * notice, this list of conditions and the following disclaimer in the
61 * documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 * must display the following acknowledgement:
64 * This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 * derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.3.12.1 2007/12/11 23:03:02 bouyer Exp $");
83
84 #include "opt_xen.h"
85 #include "opt_ddb.h"
86 #include "ksyms.h"
87
88 #include <sys/param.h>
89 #include <sys/systm.h>
90
91 #include <uvm/uvm.h>
92
93 #include <machine/pmap.h>
94 #include <machine/gdt.h>
95 #include <xen/xenfunc.h>
96
97 #include <dev/isa/isareg.h>
98 #include <machine/isa_machdep.h>
99
100 #undef XENDEBUG
101 /* #define XENDEBUG_SYNC */
102 /* #define XENDEBUG_LOW */
103
104 #ifdef XENDEBUG
105 #define XENPRINTF(x) printf x
106 #define XENPRINTK(x) printk x
107 #define XENPRINTK2(x) /* printk x */
108
109 static char XBUF[256];
110 #else
111 #define XENPRINTF(x)
112 #define XENPRINTK(x)
113 #define XENPRINTK2(x)
114 #endif
115 #define PRINTF(x) printf x
116 #define PRINTK(x) printk x
117
118 volatile shared_info_t *HYPERVISOR_shared_info;
119 union start_info_union start_info_union;
120
121 void xen_failsafe_handler(void);
122
123 #ifdef XEN3
124 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
125 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
126 #else
127 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
128 HYPERVISOR_mmu_update((req), (count), (success_count))
129 #endif
130
131 void
132 xen_failsafe_handler(void)
133 {
134
135 panic("xen_failsafe_handler called!\n");
136 }
137
138
139 #ifndef __x86_64__
140 void
141 xen_update_descriptor(union descriptor *table, union descriptor *entry)
142 {
143 paddr_t pa;
144 pt_entry_t *ptp;
145
146 ptp = kvtopte((vaddr_t)table);
147 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
148 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
149 panic("HYPERVISOR_update_descriptor failed\n");
150 }
151 #endif
152
153 void
154 xen_set_ldt(vaddr_t base, uint32_t entries)
155 {
156 vaddr_t va;
157 vaddr_t end;
158 pt_entry_t *ptp, *maptp;
159 int s;
160
161 #ifdef __x86_64__
162 end = base + (entries << 3);
163 #else
164 end = base + entries * sizeof(union descriptor);
165 #endif
166
167 for (va = base; va < end; va += PAGE_SIZE) {
168 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
169 ptp = kvtopte(va);
170 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
171 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
172 entries, ptp, maptp));
173 PTE_CLEARBITS(ptp, maptp, PG_RW);
174 }
175 s = splvm();
176 PTE_UPDATES_FLUSH();
177
178 xpq_queue_set_ldt(base, entries);
179 xpq_flush_queue();
180 splx(s);
181 }
182
183 #ifdef XENDEBUG
184 void xpq_debug_dump(void);
185 #endif
186
187 #define XPQUEUE_SIZE 2048
188 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
189 static int xpq_idx = 0;
190
191 void
192 xpq_flush_queue()
193 {
194 int i, ok;
195
196 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
197 for (i = 0; i < xpq_idx; i++)
198 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
199 (u_int)xpq_queue[i].val));
200 if (xpq_idx != 0 &&
201 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
202 printf("xpq_flush_queue: %d entries \n", xpq_idx);
203 for (i = 0; i < xpq_idx; i++)
204 printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
205 (u_int64_t)xpq_queue[i].ptr,
206 (u_int64_t)xpq_queue[i].val);
207 panic("HYPERVISOR_mmu_update failed\n");
208 }
209 xpq_idx = 0;
210 }
211
212 static inline void
213 xpq_increment_idx(void)
214 {
215
216 xpq_idx++;
217 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
218 xpq_flush_queue();
219 }
220
221 void
222 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
223 {
224 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
225 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
226 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
227 xpq_increment_idx();
228 #ifdef XENDEBUG_SYNC
229 xpq_flush_queue();
230 #endif
231 }
232
233 void
234 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
235 {
236
237 KASSERT(((paddr_t)ptr & 3) == 0);
238 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
239 xpq_queue[xpq_idx].val = val;
240 xpq_increment_idx();
241 #ifdef XENDEBUG_SYNC
242 xpq_flush_queue();
243 #endif
244 }
245
246 void
247 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
248 {
249
250 KASSERT(((paddr_t)ptr & 3) == 0);
251 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
252 xpq_queue[xpq_idx].val = val;
253 xpq_increment_idx();
254 #ifdef XENDEBUG_SYNC
255 xpq_flush_queue();
256 #endif
257 }
258
259 #ifdef XEN3
260 void
261 xpq_queue_pt_switch(paddr_t pa)
262 {
263 struct mmuext_op op;
264 xpq_flush_queue();
265
266 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
267 op.cmd = MMUEXT_NEW_BASEPTR;
268 op.arg1.mfn = pa >> PAGE_SHIFT;
269 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
270 panic("xpq_queue_pt_switch");
271 }
272
273 void
274 xpq_queue_pin_table(paddr_t pa)
275 {
276 struct mmuext_op op;
277 xpq_flush_queue();
278
279 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
280 op.arg1.mfn = pa >> PAGE_SHIFT;
281
282 #ifdef __x86_64__
283 op.cmd = MMUEXT_PIN_L4_TABLE;
284 #else
285 op.cmd = MMUEXT_PIN_L2_TABLE;
286 #endif
287 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
288 panic("xpq_queue_pin_table");
289 }
290
291 void
292 xpq_queue_unpin_table(paddr_t pa)
293 {
294 struct mmuext_op op;
295 xpq_flush_queue();
296
297 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
298 op.arg1.mfn = pa >> PAGE_SHIFT;
299 op.cmd = MMUEXT_UNPIN_TABLE;
300 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
301 panic("xpq_queue_unpin_table");
302 }
303
304 void
305 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
306 {
307 struct mmuext_op op;
308 xpq_flush_queue();
309
310 XENPRINTK2(("xpq_queue_set_ldt\n"));
311 KASSERT(va == (va & ~PAGE_MASK));
312 op.cmd = MMUEXT_SET_LDT;
313 op.arg1.linear_addr = va;
314 op.arg2.nr_ents = entries;
315 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
316 panic("xpq_queue_set_ldt");
317 }
318
319 void
320 xpq_queue_tlb_flush()
321 {
322 struct mmuext_op op;
323 xpq_flush_queue();
324
325 XENPRINTK2(("xpq_queue_tlb_flush\n"));
326 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
327 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
328 panic("xpq_queue_tlb_flush");
329 }
330
331 void
332 xpq_flush_cache()
333 {
334 struct mmuext_op op;
335 int s = splvm();
336 xpq_flush_queue();
337
338 XENPRINTK2(("xpq_queue_flush_cache\n"));
339 op.cmd = MMUEXT_FLUSH_CACHE;
340 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
341 panic("xpq_flush_cache");
342 splx(s);
343 }
344
345 void
346 xpq_queue_invlpg(vaddr_t va)
347 {
348 struct mmuext_op op;
349 xpq_flush_queue();
350
351 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
352 op.cmd = MMUEXT_INVLPG_LOCAL;
353 op.arg1.linear_addr = (va & ~PAGE_MASK);
354 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
355 panic("xpq_queue_invlpg");
356 }
357
358 int
359 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
360 {
361 mmu_update_t op;
362 int ok;
363 xpq_flush_queue();
364
365 op.ptr = (paddr_t)ptr;
366 op.val = val;
367 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
368 return EFAULT;
369 return (0);
370 }
371 #else /* XEN3 */
372 void
373 xpq_queue_pt_switch(paddr_t pa)
374 {
375
376 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
377 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
378 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
379 xpq_increment_idx();
380 }
381
382 void
383 xpq_queue_pin_table(paddr_t pa)
384 {
385
386 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
387 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
388 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
389 xpq_increment_idx();
390 }
391
392 void
393 xpq_queue_unpin_table(paddr_t pa)
394 {
395
396 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
397 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
398 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
399 xpq_increment_idx();
400 }
401
402 void
403 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
404 {
405
406 XENPRINTK2(("xpq_queue_set_ldt\n"));
407 KASSERT(va == (va & ~PAGE_MASK));
408 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
409 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
410 xpq_increment_idx();
411 }
412
413 void
414 xpq_queue_tlb_flush()
415 {
416
417 XENPRINTK2(("xpq_queue_tlb_flush\n"));
418 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
419 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
420 xpq_increment_idx();
421 }
422
423 void
424 xpq_flush_cache()
425 {
426 int s = splvm();
427
428 XENPRINTK2(("xpq_queue_flush_cache\n"));
429 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
430 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
431 xpq_increment_idx();
432 xpq_flush_queue();
433 splx(s);
434 }
435
436 void
437 xpq_queue_invlpg(vaddr_t va)
438 {
439
440 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
441 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
442 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
443 xpq_increment_idx();
444 }
445
446 int
447 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
448 {
449 mmu_update_t xpq_up[3];
450
451 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
452 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
453 xpq_up[1].ptr = (paddr_t)ptr;
454 xpq_up[1].val = val;
455 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
456 return EFAULT;
457 return (0);
458 }
459 #endif /* XEN3 */
460
461 #ifdef XENDEBUG
462 void
463 xpq_debug_dump()
464 {
465 int i;
466
467 XENPRINTK2(("idx: %d\n", xpq_idx));
468 for (i = 0; i < xpq_idx; i++) {
469 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
470 (u_int)xpq_queue[i].val);
471 if (++i < xpq_idx)
472 sprintf(XBUF + strlen(XBUF), "%x %08x ",
473 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
474 if (++i < xpq_idx)
475 sprintf(XBUF + strlen(XBUF), "%x %08x ",
476 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
477 if (++i < xpq_idx)
478 sprintf(XBUF + strlen(XBUF), "%x %08x ",
479 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
480 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
481 }
482 }
483 #endif
484
485
486 #ifdef __x86_64__
487 extern volatile struct xencons_interface *xencons_interface; /* XXX */
488 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
489
490 static void xen_bt_set_readonly (vaddr_t);
491 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
492
493 /* How many PDEs ? */
494 #if L2_SLOT_KERNBASE > 0
495 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
496 #else
497 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
498 #endif
499
500 /*
501 * Construct and switch to new pagetables
502 * first_avail is the first vaddr we can use after
503 * we get rid of Xen pagetables
504 */
505
506 vaddr_t xen_pmap_bootstrap (void);
507
508 /*
509 * Function to get rid of Xen bootstrap tables
510 */
511
512 vaddr_t
513 xen_pmap_bootstrap()
514 {
515 int count, extracount, iocount = 0;
516 #ifdef __x86_64__
517 int l2_4_count = 1;
518 #else
519 int l2_4_count = 3;
520 extern long end;
521 extern long esym;
522 #endif
523 vaddr_t bootstrap_tables, init_tables;
524
525 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
526 init_tables = xen_start_info.pt_base;
527 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
528
529 /* Space after Xen boostrap tables should be free */
530 bootstrap_tables = xen_start_info.pt_base +
531 (xen_start_info.nr_pt_frames * PAGE_SIZE);
532
533 /* Calculate how many tables we need */
534 #ifdef __x86_64__
535 count = TABLE_L2_ENTRIES;
536 #else
537 #if (NKSYMS || defined(DDB) || defined(LKM)) && !defined(SYMTAB_SPACE)
538 if (esym)
539 count = ((esym - KERNBASE) + PGOFSET) & ~PGOFSET;
540 else
541 #endif
542 count = ((end - KERNBASE) + PGOFSET) & ~PGOFSET;
543 count = ((count + ~L2_FRAME) >> L2_SHIFT) + 1;
544 count++; /* one more ptp for VAs stolen by bootstrap */
545 nkptp[1] = count;
546 #endif /* __x86_64__ */
547
548
549 #ifdef DOM0OPS
550 if (xen_start_info.flags & SIF_INITDOMAIN) {
551 /* space for ISA I/O mem */
552 iocount = IOM_SIZE / PAGE_SIZE;
553 }
554 #endif
555 extracount = iocount + l2_4_count;
556
557 /*
558 * Xen space we'll reclaim may not be enough for our new page tables,
559 * move bootstrap tables if necessary
560 */
561
562 if (bootstrap_tables < init_tables + ((count + extracount) * PAGE_SIZE))
563 bootstrap_tables = init_tables +
564 ((count + extracount) * PAGE_SIZE);
565
566 /* Create temporary tables */
567 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
568 xen_start_info.nr_pt_frames, count, 0);
569
570 /* get vaddr space for the shared info and the console pages */
571
572 /* Create final tables */
573 xen_bootstrap_tables(bootstrap_tables, init_tables,
574 count + l2_4_count, count, 1);
575
576 return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
577 }
578
579
580 /*
581 * Build a new table and switch to it
582 * old_count is # of old tables (including PGD, PDTPE and PDE)
583 * new_count is # of new tables (PTE only)
584 * we assume areas don't overlap
585 */
586
587
588 static void
589 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
590 int old_count, int new_count, int final)
591 {
592 pd_entry_t *pdtpe, *pde, *pte;
593 pd_entry_t *cur_pgd, *bt_pgd;
594 paddr_t addr, page;
595 vaddr_t avail, text_end, map_end;
596 int i;
597 extern char __data_start;
598
599 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
600 old_pgd, new_pgd, old_count, new_count));
601 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
602 /*
603 * size of R/W area after kernel text:
604 * xencons_interface (if present)
605 * xenstore_interface (if present)
606 * table pages (new_count + 3 entries)
607 * UAREA
608 * dummy user PGD
609 * extra mappings (only when final is true):
610 * HYPERVISOR_shared_info
611 * ISA I/O mem (if needed)
612 */
613 map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG);
614 if (final) {
615 HYPERVISOR_shared_info = (struct shared_info *)map_end;
616 map_end += NBPG;
617 }
618 #ifdef DOM0OPS
619 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
620 /* ISA I/O mem */
621 atdevbase = map_end;
622 map_end += IOM_SIZE;
623 }
624 #endif /* DOM0OPS */
625
626 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
627 text_end, map_end));
628
629 /*
630 * Create bootstrap page tables
631 * What we need:
632 * - a PGD (level 4)
633 * - a PDTPE (level 3)
634 * - a PDE (level2)
635 * - some PTEs (level 1)
636 */
637
638 cur_pgd = (pd_entry_t *) old_pgd;
639 bt_pgd = (pd_entry_t *) new_pgd;
640 memset (bt_pgd, 0, PAGE_SIZE);
641 avail = new_pgd + PAGE_SIZE;
642
643 /* Install level 3 */
644 pdtpe = (pd_entry_t *) avail;
645 memset (pdtpe, 0, PAGE_SIZE);
646 avail += PAGE_SIZE;
647
648 addr = ((paddr_t) pdtpe) - KERNBASE;
649 bt_pgd[pl4_pi(KERNTEXTOFF)] =
650 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
651
652 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
653 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
654
655 /* Level 2 */
656 pde = (pd_entry_t *) avail;
657 memset(pde, 0, PAGE_SIZE);
658 avail += PAGE_SIZE;
659
660 addr = ((paddr_t) pde) - KERNBASE;
661 pdtpe[pl3_pi(KERNTEXTOFF)] =
662 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
663 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
664 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
665
666 /* Level 1 */
667 page = KERNTEXTOFF;
668 for (i = 0; i < new_count; i ++) {
669 paddr_t cur_page = page;
670
671 pte = (pd_entry_t *) avail;
672 avail += PAGE_SIZE;
673
674 memset(pte, 0, PAGE_SIZE);
675 while (pl2_pi(page) == pl2_pi (cur_page)) {
676 if (page >= map_end) {
677 /* not mapped at all */
678 pte[pl1_pi(page)] = 0;
679 page += PAGE_SIZE;
680 continue;
681 }
682 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
683 if (page == (vaddr_t)HYPERVISOR_shared_info) {
684 pte[pl1_pi(page)] = xen_start_info.shared_info;
685 __PRINTK(("HYPERVISOR_shared_info "
686 "va 0x%lx pte 0x%lx\n",
687 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
688 }
689 if (xpmap_ptom_masked(page - KERNBASE) ==
690 (xen_start_info.console_mfn << PAGE_SHIFT)) {
691 xencons_interface = (void *)page;
692 pte[pl1_pi(page)] =
693 (xen_start_info.console_mfn << PAGE_SHIFT);
694 __PRINTK(("xencons_interface "
695 va 0x%lx pte 0x%lx\n",
696 xencons_interface, pte[pl1_pi(page)]));
697 }
698 if (xpmap_ptom_masked(page - KERNBASE) ==
699 (xen_start_info.store_mfn << PAGE_SHIFT)) {
700 xenstore_interface = (void *)page;
701 pte[pl1_pi(page)] =
702 (xen_start_info.store_mfn << PAGE_SHIFT);
703 __PRINTK(("xenstore_interface "
704 "va 0x%lx pte 0x%lx\n",
705 xenstore_interface, pte[pl1_pi(page)]));
706 }
707 #ifdef DOM0OPS
708 if (page >= (vaddr_t)atdevbase &&
709 page < (vaddr_t)atdevbase + IOM_SIZE) {
710 pte[pl1_pi(page)] =
711 IOM_BEGIN + (page - (vaddr_t)atdevbase);
712 }
713 #endif
714 pte[pl1_pi(page)] |= PG_u | PG_V;
715 if (page < text_end) {
716 /* map kernel text RO */
717 pte[pl1_pi(page)] |= 0;
718 } else if (page >= old_pgd
719 && page < old_pgd + (old_count * PAGE_SIZE)) {
720 /* map old page tables RO */
721 pte[pl1_pi(page)] |= 0;
722 } else if (page >= new_pgd &&
723 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
724 /* map new page tables RO */
725 pte[pl1_pi(page)] |= 0;
726 } else {
727 /* map page RW */
728 pte[pl1_pi(page)] |= PG_RW;
729 }
730 if (page == old_pgd)
731 __PRINTK(("va 0x%lx pa 0x%lx
732 "entry 0x%lx -> L1[0x%x]\n",
733 page, page - KERNBASE,
734 pte[pl1_pi(page)], pl1_pi(page)));
735 page += PAGE_SIZE;
736 }
737
738 addr = ((paddr_t) pte) - KERNBASE;
739 pde[pl2_pi(cur_page)] =
740 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
741 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
742 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
743 /* Mark readonly */
744 xen_bt_set_readonly((vaddr_t) pte);
745 }
746
747 /* Install recursive page tables mapping */
748 bt_pgd[PDIR_SLOT_PTE] =
749 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
750 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
751 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
752
753 /* Mark tables RO */
754 xen_bt_set_readonly((vaddr_t) pde);
755 xen_bt_set_readonly((vaddr_t) pdtpe);
756 xen_bt_set_readonly(new_pgd);
757 /* Pin the PGD */
758 __PRINTK(("pin PDG\n"));
759 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
760 /* Switch to new tables */
761 __PRINTK(("switch to PDG\n"));
762 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
763 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
764 bt_pgd[PDIR_SLOT_PTE]));
765 __PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE));
766 __PRINTK(("value 0x%lx\n", *L4_BASE));
767 __PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE]));
768
769 /* Now we can safely reclaim space taken by old tables */
770
771 __PRINTK(("unpin old PDG\n"));
772 /* Unpin old PGD */
773 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
774 /* Mark old tables RW */
775 page = old_pgd;
776 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
777 addr = xpmap_mtop(addr);
778 pte = (pd_entry_t *) (addr + KERNBASE);
779 pte += pl1_pi(page);
780 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
781 pde[pl2_pi(page)], addr, pte));
782 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
783 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
784 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
785 page += PAGE_SIZE;
786 /*
787 * Our ptes are contiguous
788 * so it's safe to just "++" here
789 */
790 pte++;
791 }
792 xpq_flush_queue();
793 }
794
795
796 void
797 xen_set_user_pgd(paddr_t page)
798 {
799 struct mmuext_op op;
800 int s = splvm();
801
802 xpq_flush_queue();
803 op.cmd = MMUEXT_NEW_USER_BASEPTR;
804 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
805 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
806 panic("xen_set_user_pgd: failed to install new user page"
807 " directory %lx", page);
808 splx(s);
809 }
810
811 /*
812 * Bootstrap helper functions
813 */
814
815 /*
816 * Mark a page readonly
817 * XXX: assuming vaddr = paddr + KERNBASE
818 */
819
820 static void
821 xen_bt_set_readonly (vaddr_t page)
822 {
823 pt_entry_t entry;
824
825 entry = xpmap_ptom_masked(page - KERNBASE);
826 entry |= PG_u | PG_V;
827
828 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
829 }
830 #endif /* __x86_64__ */
831