x86_xpmap.c revision 1.3.12.3 1 /* $NetBSD: x86_xpmap.c,v 1.3.12.3 2007/12/15 22:56:55 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49 /*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 * notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 * notice, this list of conditions and the following disclaimer in the
61 * documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 * must display the following acknowledgement:
64 * This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 * derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.3.12.3 2007/12/15 22:56:55 bouyer Exp $");
83
84 #include "opt_xen.h"
85 #include "opt_ddb.h"
86 #include "ksyms.h"
87
88 #include <sys/param.h>
89 #include <sys/systm.h>
90
91 #include <uvm/uvm.h>
92
93 #include <machine/pmap.h>
94 #include <machine/gdt.h>
95 #include <xen/xenfunc.h>
96
97 #include <dev/isa/isareg.h>
98 #include <machine/isa_machdep.h>
99
100 #undef XENDEBUG
101 /* #define XENDEBUG_SYNC */
102 /* #define XENDEBUG_LOW */
103
104 #ifdef XENDEBUG
105 #define XENPRINTF(x) printf x
106 #define XENPRINTK(x) printk x
107 #define XENPRINTK2(x) /* printk x */
108
109 static char XBUF[256];
110 #else
111 #define XENPRINTF(x)
112 #define XENPRINTK(x)
113 #define XENPRINTK2(x)
114 #endif
115 #define PRINTF(x) printf x
116 #define PRINTK(x) printk x
117
118 /* on x86_64 kernel runs in ring 3 */
119 #ifdef __x86_64__
120 #define PG_k PG_u
121 #else
122 #define PG_k 0
123 #endif
124
125 volatile shared_info_t *HYPERVISOR_shared_info;
126 union start_info_union start_info_union;
127
128 void xen_failsafe_handler(void);
129
130 #ifdef XEN3
131 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
132 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
133 #else
134 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
135 HYPERVISOR_mmu_update((req), (count), (success_count))
136 #endif
137
138 void
139 xen_failsafe_handler(void)
140 {
141
142 panic("xen_failsafe_handler called!\n");
143 }
144
145
146 #ifndef __x86_64__
147 void
148 xen_update_descriptor(union descriptor *table, union descriptor *entry)
149 {
150 paddr_t pa;
151 pt_entry_t *ptp;
152
153 ptp = kvtopte((vaddr_t)table);
154 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
155 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
156 panic("HYPERVISOR_update_descriptor failed\n");
157 }
158 #endif
159
160 void
161 xen_set_ldt(vaddr_t base, uint32_t entries)
162 {
163 vaddr_t va;
164 vaddr_t end;
165 pt_entry_t *ptp, *maptp;
166 int s;
167
168 #ifdef __x86_64__
169 end = base + (entries << 3);
170 #else
171 end = base + entries * sizeof(union descriptor);
172 #endif
173
174 for (va = base; va < end; va += PAGE_SIZE) {
175 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
176 ptp = kvtopte(va);
177 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
178 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
179 entries, ptp, maptp));
180 PTE_CLEARBITS(ptp, maptp, PG_RW);
181 }
182 s = splvm();
183 PTE_UPDATES_FLUSH();
184
185 xpq_queue_set_ldt(base, entries);
186 xpq_flush_queue();
187 splx(s);
188 }
189
190 #ifdef XENDEBUG
191 void xpq_debug_dump(void);
192 #endif
193
194 #define XPQUEUE_SIZE 2048
195 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
196 static int xpq_idx = 0;
197
198 void
199 xpq_flush_queue()
200 {
201 int i, ok;
202
203 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
204 for (i = 0; i < xpq_idx; i++)
205 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
206 (u_int)xpq_queue[i].val));
207 if (xpq_idx != 0 &&
208 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
209 printf("xpq_flush_queue: %d entries \n", xpq_idx);
210 for (i = 0; i < xpq_idx; i++)
211 printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
212 (u_int64_t)xpq_queue[i].ptr,
213 (u_int64_t)xpq_queue[i].val);
214 panic("HYPERVISOR_mmu_update failed\n");
215 }
216 xpq_idx = 0;
217 }
218
219 static inline void
220 xpq_increment_idx(void)
221 {
222
223 xpq_idx++;
224 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
225 xpq_flush_queue();
226 }
227
228 void
229 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
230 {
231 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
232 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
233 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
234 xpq_increment_idx();
235 #ifdef XENDEBUG_SYNC
236 xpq_flush_queue();
237 #endif
238 }
239
240 void
241 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
242 {
243
244 KASSERT(((paddr_t)ptr & 3) == 0);
245 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
246 xpq_queue[xpq_idx].val = val;
247 xpq_increment_idx();
248 #ifdef XENDEBUG_SYNC
249 xpq_flush_queue();
250 #endif
251 }
252
253 void
254 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
255 {
256
257 KASSERT(((paddr_t)ptr & 3) == 0);
258 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
259 xpq_queue[xpq_idx].val = val;
260 xpq_increment_idx();
261 #ifdef XENDEBUG_SYNC
262 xpq_flush_queue();
263 #endif
264 }
265
266 #ifdef XEN3
267 void
268 xpq_queue_pt_switch(paddr_t pa)
269 {
270 struct mmuext_op op;
271 xpq_flush_queue();
272
273 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
274 op.cmd = MMUEXT_NEW_BASEPTR;
275 op.arg1.mfn = pa >> PAGE_SHIFT;
276 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
277 panic("xpq_queue_pt_switch");
278 }
279
280 void
281 xpq_queue_pin_table(paddr_t pa)
282 {
283 struct mmuext_op op;
284 xpq_flush_queue();
285
286 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
287 op.arg1.mfn = pa >> PAGE_SHIFT;
288
289 #ifdef __x86_64__
290 op.cmd = MMUEXT_PIN_L4_TABLE;
291 #else
292 op.cmd = MMUEXT_PIN_L2_TABLE;
293 #endif
294 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
295 panic("xpq_queue_pin_table");
296 }
297
298 void
299 xpq_queue_unpin_table(paddr_t pa)
300 {
301 struct mmuext_op op;
302 xpq_flush_queue();
303
304 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
305 op.arg1.mfn = pa >> PAGE_SHIFT;
306 op.cmd = MMUEXT_UNPIN_TABLE;
307 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
308 panic("xpq_queue_unpin_table");
309 }
310
311 void
312 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
313 {
314 struct mmuext_op op;
315 xpq_flush_queue();
316
317 XENPRINTK2(("xpq_queue_set_ldt\n"));
318 KASSERT(va == (va & ~PAGE_MASK));
319 op.cmd = MMUEXT_SET_LDT;
320 op.arg1.linear_addr = va;
321 op.arg2.nr_ents = entries;
322 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
323 panic("xpq_queue_set_ldt");
324 }
325
326 void
327 xpq_queue_tlb_flush()
328 {
329 struct mmuext_op op;
330 xpq_flush_queue();
331
332 XENPRINTK2(("xpq_queue_tlb_flush\n"));
333 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
334 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
335 panic("xpq_queue_tlb_flush");
336 }
337
338 void
339 xpq_flush_cache()
340 {
341 struct mmuext_op op;
342 int s = splvm();
343 xpq_flush_queue();
344
345 XENPRINTK2(("xpq_queue_flush_cache\n"));
346 op.cmd = MMUEXT_FLUSH_CACHE;
347 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
348 panic("xpq_flush_cache");
349 splx(s);
350 }
351
352 void
353 xpq_queue_invlpg(vaddr_t va)
354 {
355 struct mmuext_op op;
356 xpq_flush_queue();
357
358 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
359 op.cmd = MMUEXT_INVLPG_LOCAL;
360 op.arg1.linear_addr = (va & ~PAGE_MASK);
361 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
362 panic("xpq_queue_invlpg");
363 }
364
365 int
366 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
367 {
368 mmu_update_t op;
369 int ok;
370 xpq_flush_queue();
371
372 op.ptr = (paddr_t)ptr;
373 op.val = val;
374 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
375 return EFAULT;
376 return (0);
377 }
378 #else /* XEN3 */
379 void
380 xpq_queue_pt_switch(paddr_t pa)
381 {
382
383 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
384 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
385 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
386 xpq_increment_idx();
387 }
388
389 void
390 xpq_queue_pin_table(paddr_t pa)
391 {
392
393 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
394 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
395 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
396 xpq_increment_idx();
397 }
398
399 void
400 xpq_queue_unpin_table(paddr_t pa)
401 {
402
403 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
404 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
405 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
406 xpq_increment_idx();
407 }
408
409 void
410 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
411 {
412
413 XENPRINTK2(("xpq_queue_set_ldt\n"));
414 KASSERT(va == (va & ~PAGE_MASK));
415 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
416 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
417 xpq_increment_idx();
418 }
419
420 void
421 xpq_queue_tlb_flush()
422 {
423
424 XENPRINTK2(("xpq_queue_tlb_flush\n"));
425 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
426 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
427 xpq_increment_idx();
428 }
429
430 void
431 xpq_flush_cache()
432 {
433 int s = splvm();
434
435 XENPRINTK2(("xpq_queue_flush_cache\n"));
436 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
437 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
438 xpq_increment_idx();
439 xpq_flush_queue();
440 splx(s);
441 }
442
443 void
444 xpq_queue_invlpg(vaddr_t va)
445 {
446
447 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
448 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
449 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
450 xpq_increment_idx();
451 }
452
453 int
454 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
455 {
456 mmu_update_t xpq_up[3];
457
458 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
459 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
460 xpq_up[1].ptr = (paddr_t)ptr;
461 xpq_up[1].val = val;
462 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
463 return EFAULT;
464 return (0);
465 }
466 #endif /* XEN3 */
467
468 #ifdef XENDEBUG
469 void
470 xpq_debug_dump()
471 {
472 int i;
473
474 XENPRINTK2(("idx: %d\n", xpq_idx));
475 for (i = 0; i < xpq_idx; i++) {
476 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
477 (u_int)xpq_queue[i].val);
478 if (++i < xpq_idx)
479 sprintf(XBUF + strlen(XBUF), "%x %08x ",
480 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
481 if (++i < xpq_idx)
482 sprintf(XBUF + strlen(XBUF), "%x %08x ",
483 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
484 if (++i < xpq_idx)
485 sprintf(XBUF + strlen(XBUF), "%x %08x ",
486 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
487 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
488 }
489 }
490 #endif
491
492
493 extern volatile struct xencons_interface *xencons_interface; /* XXX */
494 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
495
496 static void xen_bt_set_readonly (vaddr_t);
497 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
498
499 /* How many PDEs ? */
500 #if L2_SLOT_KERNBASE > 0
501 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
502 #else
503 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
504 #endif
505
506 /*
507 * Construct and switch to new pagetables
508 * first_avail is the first vaddr we can use after
509 * we get rid of Xen pagetables
510 */
511
512 vaddr_t xen_pmap_bootstrap (void);
513
514 /*
515 * Function to get rid of Xen bootstrap tables
516 */
517
518 vaddr_t
519 xen_pmap_bootstrap()
520 {
521 int count, oldcount;
522 long mapsize;
523 const int l2_4_count = PTP_LEVELS - 1;
524 vaddr_t bootstrap_tables, init_tables;
525
526 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
527 init_tables = xen_start_info.pt_base;
528 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
529
530 /* Space after Xen boostrap tables should be free */
531 bootstrap_tables = xen_start_info.pt_base +
532 (xen_start_info.nr_pt_frames * PAGE_SIZE);
533
534 /*
535 * Calculate how many space we need
536 * first everything mapped before the Xen bootstrap tables
537 */
538 mapsize = init_tables - KERNTEXTOFF;
539 /* after the tables we'll have:
540 * - UAREA
541 * - dummy user PGD (x86_64)
542 * - ISA MEM space
543 * - HYPERVISOR_shared_info
544 * - ISA I/O mem (if needed)
545 */
546 mapsize += UPAGES * NBPG;
547 #ifdef __x86_64__
548 mapsize += NBPG;
549 #endif
550 mapsize += NBPG;
551
552 #ifdef DOM0OPS
553 if (xen_start_info.flags & SIF_INITDOMAIN) {
554 /* space for ISA I/O mem */
555 mapsize += IOM_SIZE;
556 }
557 #endif
558 /* at this point mapsize doens't include the table size */
559
560 #ifdef __x86_64__
561 count = TABLE_L2_ENTRIES;
562 #else
563 count = (mapsize + (NBPD_L2 -1)) >> L2_SHIFT;
564 #endif /* __x86_64__ */
565
566 /* now compute how many L2 pages we need exactly */
567 printk("bootstrap_final mapsize 0x%lx count %d\n", mapsize, count);
568 while (mapsize + (count + l2_4_count) * PAGE_SIZE + KERNTEXTOFF >
569 ((long)count << L2_SHIFT) + KERNBASE) {
570 count++;
571 }
572 #ifndef __x86_64__
573 nkptp[1] = count;
574 #endif
575
576 /*
577 * install bootstrap pages. We may need more L2 pages than will
578 * have the final table here, as it's installed after the final table
579 */
580 oldcount = count;
581
582 bootstrap_again:
583 printk("bootstrap_again oldcount %d\n", oldcount);
584 /*
585 * Xen space we'll reclaim may not be enough for our new page tables,
586 * move bootstrap tables if necessary
587 */
588 if (bootstrap_tables < init_tables + ((count + l2_4_count) * PAGE_SIZE))
589 bootstrap_tables = init_tables +
590 ((count + l2_4_count) * PAGE_SIZE);
591 /* make sure we have enough to map the bootstrap_tables */
592 if (bootstrap_tables + ((oldcount + l2_4_count) * PAGE_SIZE) >
593 ((long)oldcount << L2_SHIFT) + KERNBASE) {
594 oldcount++;
595 goto bootstrap_again;
596 }
597
598 /* Create temporary tables */
599 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
600 xen_start_info.nr_pt_frames, oldcount, 0);
601
602 /* Create final tables */
603 xen_bootstrap_tables(bootstrap_tables, init_tables,
604 oldcount + l2_4_count, count, 1);
605
606 /* zero out free space after tables */
607 memset((void *)(init_tables + ((count + l2_4_count) * PAGE_SIZE)), 0,
608 (UPAGES + 1) * NBPG);
609 return (init_tables + ((count + l2_4_count) * PAGE_SIZE));
610 }
611
612
613 /*
614 * Build a new table and switch to it
615 * old_count is # of old tables (including PGD, PDTPE and PDE)
616 * new_count is # of new tables (PTE only)
617 * we assume areas don't overlap
618 */
619
620
621 static void
622 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
623 int old_count, int new_count, int final)
624 {
625 pd_entry_t *pdtpe, *pde, *pte;
626 pd_entry_t *cur_pgd, *bt_pgd;
627 paddr_t addr, page;
628 vaddr_t avail, text_end, map_end;
629 int i;
630 extern char __data_start;
631
632 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
633 old_pgd, new_pgd, old_count, new_count));
634 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
635 /*
636 * size of R/W area after kernel text:
637 * xencons_interface (if present)
638 * xenstore_interface (if present)
639 * table pages (new_count + (PTP_LEVELS - 1) entries)
640 * extra mappings (only when final is true):
641 * UAREA
642 * dummy user PGD (x86_64 only)/gdt page (i386 only)
643 * HYPERVISOR_shared_info
644 * ISA I/O mem (if needed)
645 */
646 map_end = new_pgd + ((new_count + PTP_LEVELS - 1) * NBPG);
647 if (final) {
648 map_end += (UPAGES + 1) * NBPG;
649 HYPERVISOR_shared_info = (struct shared_info *)map_end;
650 map_end += NBPG;
651 }
652 /*
653 * we always set atdevbase, as it's used by init386 to find the first
654 * available VA. map_end is updated only if we are dom0, so
655 * atdevbase -> atdevbase + IOM_SIZE will be mapped only in
656 * this case.
657 */
658 if (final)
659 atdevbase = map_end;
660 #ifdef DOM0OPS
661 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
662 /* ISA I/O mem */
663 map_end += IOM_SIZE;
664 }
665 #endif /* DOM0OPS */
666
667 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
668 text_end, map_end));
669
670 /*
671 * Create bootstrap page tables
672 * What we need:
673 * - a PGD (level 4)
674 * - a PDTPE (level 3)
675 * - a PDE (level2)
676 * - some PTEs (level 1)
677 */
678
679 cur_pgd = (pd_entry_t *) old_pgd;
680 bt_pgd = (pd_entry_t *) new_pgd;
681 memset (bt_pgd, 0, PAGE_SIZE);
682 avail = new_pgd + PAGE_SIZE;
683 #if PTP_LEVELS > 3
684 /* Install level 3 */
685 pdtpe = (pd_entry_t *) avail;
686 memset (pdtpe, 0, PAGE_SIZE);
687 avail += PAGE_SIZE;
688
689 addr = ((paddr_t) pdtpe) - KERNBASE;
690 bt_pgd[pl4_pi(KERNTEXTOFF)] =
691 xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
692
693 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
694 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
695 #else
696 pdtpe = bt_pgd;
697 #endif /* PTP_LEVELS > 3 */
698
699 #if PTP_LEVELS > 2
700 /* Level 2 */
701 pde = (pd_entry_t *) avail;
702 memset(pde, 0, PAGE_SIZE);
703 avail += PAGE_SIZE;
704
705 addr = ((paddr_t) pde) - KERNBASE;
706 pdtpe[pl3_pi(KERNTEXTOFF)] =
707 xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
708 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
709 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
710 #else
711 pde = bt_pgd;
712 #endif /* PTP_LEVELS > 3 */
713
714 /* Level 1 */
715 page = KERNTEXTOFF;
716 for (i = 0; i < new_count; i ++) {
717 paddr_t cur_page = page;
718
719 pte = (pd_entry_t *) avail;
720 avail += PAGE_SIZE;
721
722 memset(pte, 0, PAGE_SIZE);
723 while (pl2_pi(page) == pl2_pi (cur_page)) {
724 if (page >= map_end) {
725 /* not mapped at all */
726 pte[pl1_pi(page)] = 0;
727 page += PAGE_SIZE;
728 continue;
729 }
730 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
731 if (page == (vaddr_t)HYPERVISOR_shared_info) {
732 pte[pl1_pi(page)] = xen_start_info.shared_info;
733 __PRINTK(("HYPERVISOR_shared_info "
734 "va 0x%lx pte 0x%lx\n",
735 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
736 }
737 if (xpmap_ptom_masked(page - KERNBASE) ==
738 (xen_start_info.console_mfn << PAGE_SHIFT)) {
739 xencons_interface = (void *)page;
740 pte[pl1_pi(page)] =
741 (xen_start_info.console_mfn << PAGE_SHIFT);
742 __PRINTK(("xencons_interface "
743 "va 0x%lx pte 0x%lx\n",
744 xencons_interface, pte[pl1_pi(page)]));
745 }
746 if (xpmap_ptom_masked(page - KERNBASE) ==
747 (xen_start_info.store_mfn << PAGE_SHIFT)) {
748 xenstore_interface = (void *)page;
749 pte[pl1_pi(page)] =
750 (xen_start_info.store_mfn << PAGE_SHIFT);
751 __PRINTK(("xenstore_interface "
752 "va 0x%lx pte 0x%lx\n",
753 xenstore_interface, pte[pl1_pi(page)]));
754 }
755 #ifdef DOM0OPS
756 if (page >= (vaddr_t)atdevbase &&
757 page < (vaddr_t)atdevbase + IOM_SIZE) {
758 pte[pl1_pi(page)] =
759 IOM_BEGIN + (page - (vaddr_t)atdevbase);
760 }
761 #endif
762 pte[pl1_pi(page)] |= PG_k | PG_V;
763 if (page < text_end) {
764 /* map kernel text RO */
765 pte[pl1_pi(page)] |= 0;
766 } else if (page >= old_pgd
767 && page < old_pgd + (old_count * PAGE_SIZE)) {
768 /* map old page tables RO */
769 pte[pl1_pi(page)] |= 0;
770 } else if (page >= new_pgd &&
771 page < new_pgd + ((new_count + PTP_LEVELS - 1) * PAGE_SIZE)) {
772 /* map new page tables RO */
773 pte[pl1_pi(page)] |= 0;
774 } else {
775 /* map page RW */
776 pte[pl1_pi(page)] |= PG_RW;
777 }
778 if (page == old_pgd || page >= new_pgd)
779 __PRINTK(("va 0x%lx pa 0x%lx "
780 "entry 0x%lx -> L1[0x%x]\n",
781 page, page - KERNBASE,
782 pte[pl1_pi(page)], pl1_pi(page)));
783 page += PAGE_SIZE;
784 }
785
786 addr = ((paddr_t) pte) - KERNBASE;
787 pde[pl2_pi(cur_page)] =
788 xpmap_ptom_masked(addr) | PG_k | PG_RW | PG_V;
789 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
790 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
791 /* Mark readonly */
792 xen_bt_set_readonly((vaddr_t) pte);
793 }
794
795 /* Install recursive page tables mapping */
796 bt_pgd[PDIR_SLOT_PTE] =
797 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_k | PG_V;
798 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
799 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
800
801 /* Mark tables RO */
802 xen_bt_set_readonly((vaddr_t) pde);
803 #if PTP_LEVELS > 2
804 xen_bt_set_readonly((vaddr_t) pdtpe);
805 #endif
806 #if PTP_LEVELS > 3
807 xen_bt_set_readonly(new_pgd);
808 #endif
809 /* Pin the PGD */
810 __PRINTK(("pin PDG\n"));
811 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
812 #ifdef __i386__
813 /* Save phys. addr of PDP, for libkvm. */
814 PDPpaddr = new_pgd;
815 #endif
816 /* Switch to new tables */
817 __PRINTK(("switch to PDG\n"));
818 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
819 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
820 bt_pgd[PDIR_SLOT_PTE]));
821
822 /* Now we can safely reclaim space taken by old tables */
823
824 __PRINTK(("unpin old PDG\n"));
825 /* Unpin old PGD */
826 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
827 /* Mark old tables RW */
828 page = old_pgd;
829 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
830 addr = xpmap_mtop(addr);
831 pte = (pd_entry_t *) (addr + KERNBASE);
832 pte += pl1_pi(page);
833 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
834 pde[pl2_pi(page)], addr, pte));
835 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
836 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
837 printk("addr 0x%lx pte 0x%lx *pte 0x%lx\n",
838 addr, pte, *pte);
839 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
840 page += PAGE_SIZE;
841 /*
842 * Our ptes are contiguous
843 * so it's safe to just "++" here
844 */
845 pte++;
846 }
847 xpq_flush_queue();
848 }
849
850
851 /*
852 * Bootstrap helper functions
853 */
854
855 /*
856 * Mark a page readonly
857 * XXX: assuming vaddr = paddr + KERNBASE
858 */
859
860 static void
861 xen_bt_set_readonly (vaddr_t page)
862 {
863 pt_entry_t entry;
864
865 entry = xpmap_ptom_masked(page - KERNBASE);
866 entry |= PG_u | PG_V;
867
868 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
869 }
870
871 #ifdef __x86_64__
872 void
873 xen_set_user_pgd(paddr_t page)
874 {
875 struct mmuext_op op;
876 int s = splvm();
877
878 xpq_flush_queue();
879 op.cmd = MMUEXT_NEW_USER_BASEPTR;
880 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
881 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
882 panic("xen_set_user_pgd: failed to install new user page"
883 " directory %lx", page);
884 splx(s);
885 }
886 #endif /* __x86_64__ */
887