x86_xpmap.c revision 1.3.4.2 1 /* $NetBSD: x86_xpmap.c,v 1.3.4.2 2007/12/03 19:04:43 ad Exp $ */
2
3 /*
4 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Copyright (c) 2006, 2007 Manuel Bouyer.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * This product includes software developed by Manuel Bouyer.
33 * 4. The name of the author may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49 /*
50 *
51 * Copyright (c) 2004 Christian Limpach.
52 * All rights reserved.
53 *
54 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions
56 * are met:
57 * 1. Redistributions of source code must retain the above copyright
58 * notice, this list of conditions and the following disclaimer.
59 * 2. Redistributions in binary form must reproduce the above copyright
60 * notice, this list of conditions and the following disclaimer in the
61 * documentation and/or other materials provided with the distribution.
62 * 3. All advertising materials mentioning features or use of this software
63 * must display the following acknowledgement:
64 * This product includes software developed by Christian Limpach.
65 * 4. The name of the author may not be used to endorse or promote products
66 * derived from this software without specific prior written permission.
67 *
68 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
69 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
70 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
71 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
72 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
73 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
74 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
75 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
76 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
77 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 */
79
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.3.4.2 2007/12/03 19:04:43 ad Exp $");
83
84 #include "opt_xen.h"
85
86 #include <sys/param.h>
87 #include <sys/systm.h>
88
89 #include <uvm/uvm.h>
90
91 #include <machine/pmap.h>
92 #include <machine/gdt.h>
93 #include <xen/xenfunc.h>
94
95 #include <dev/isa/isareg.h>
96 #include <machine/isa_machdep.h>
97
98 #undef XENDEBUG
99 /* #define XENDEBUG_SYNC */
100 /* #define XENDEBUG_LOW */
101
102 #ifdef XENDEBUG
103 #define XENPRINTF(x) printf x
104 #define XENPRINTK(x) printk x
105 #define XENPRINTK2(x) /* printk x */
106
107 static char XBUF[256];
108 #else
109 #define XENPRINTF(x)
110 #define XENPRINTK(x)
111 #define XENPRINTK2(x)
112 #endif
113 #define PRINTF(x) printf x
114 #define PRINTK(x) printk x
115
116 volatile shared_info_t *HYPERVISOR_shared_info;
117 union start_info_union start_info_union;
118
119 void xen_failsafe_handler(void);
120
121 #ifdef XEN3
122 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
123 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
124 #else
125 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
126 HYPERVISOR_mmu_update((req), (count), (success_count))
127 #endif
128
129 void
130 xen_failsafe_handler(void)
131 {
132
133 panic("xen_failsafe_handler called!\n");
134 }
135
136
137 #ifndef __x86_64__
138 void
139 xen_update_descriptor(union descriptor *table, union descriptor *entry)
140 {
141 paddr_t pa;
142 pt_entry_t *ptp;
143
144 ptp = kvtopte((vaddr_t)table);
145 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
146 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
147 panic("HYPERVISOR_update_descriptor failed\n");
148 }
149 #endif
150
151 void
152 xen_set_ldt(vaddr_t base, uint32_t entries)
153 {
154 vaddr_t va;
155 vaddr_t end;
156 pt_entry_t *ptp, *maptp;
157 int s;
158
159 #ifdef __x86_64__
160 end = base + (entries << 3);
161 #else
162 end = base + entries * sizeof(union descriptor);
163 #endif
164
165 for (va = base; va < end; va += PAGE_SIZE) {
166 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
167 ptp = kvtopte(va);
168 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
169 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
170 entries, ptp, maptp));
171 PTE_CLEARBITS(ptp, maptp, PG_RW);
172 }
173 s = splvm();
174 PTE_UPDATES_FLUSH();
175
176 xpq_queue_set_ldt(base, entries);
177 xpq_flush_queue();
178 splx(s);
179 }
180
181 #ifdef XENDEBUG
182 void xpq_debug_dump(void);
183 #endif
184
185 #define XPQUEUE_SIZE 2048
186 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
187 static int xpq_idx = 0;
188
189 void
190 xpq_flush_queue()
191 {
192 int i, ok;
193
194 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
195 for (i = 0; i < xpq_idx; i++)
196 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
197 (u_int)xpq_queue[i].val));
198 if (xpq_idx != 0 &&
199 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
200 printf("xpq_flush_queue: %d entries \n", xpq_idx);
201 for (i = 0; i < xpq_idx; i++)
202 printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
203 (u_int64_t)xpq_queue[i].ptr,
204 (u_int64_t)xpq_queue[i].val);
205 panic("HYPERVISOR_mmu_update failed\n");
206 }
207 xpq_idx = 0;
208 }
209
210 static inline void
211 xpq_increment_idx(void)
212 {
213
214 xpq_idx++;
215 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
216 xpq_flush_queue();
217 }
218
219 void
220 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
221 {
222 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
223 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
224 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
225 xpq_increment_idx();
226 #ifdef XENDEBUG_SYNC
227 xpq_flush_queue();
228 #endif
229 }
230
231 void
232 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
233 {
234
235 KASSERT(((paddr_t)ptr & 3) == 0);
236 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
237 xpq_queue[xpq_idx].val = val;
238 xpq_increment_idx();
239 #ifdef XENDEBUG_SYNC
240 xpq_flush_queue();
241 #endif
242 }
243
244 void
245 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
246 {
247
248 KASSERT(((paddr_t)ptr & 3) == 0);
249 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
250 xpq_queue[xpq_idx].val = val;
251 xpq_increment_idx();
252 #ifdef XENDEBUG_SYNC
253 xpq_flush_queue();
254 #endif
255 }
256
257 #ifdef XEN3
258 void
259 xpq_queue_pt_switch(paddr_t pa)
260 {
261 struct mmuext_op op;
262 xpq_flush_queue();
263
264 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
265 op.cmd = MMUEXT_NEW_BASEPTR;
266 op.arg1.mfn = pa >> PAGE_SHIFT;
267 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
268 panic("xpq_queue_pt_switch");
269 }
270
271 void
272 xpq_queue_pin_table(paddr_t pa)
273 {
274 struct mmuext_op op;
275 xpq_flush_queue();
276
277 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
278 op.arg1.mfn = pa >> PAGE_SHIFT;
279
280 #ifdef __x86_64__
281 op.cmd = MMUEXT_PIN_L4_TABLE;
282 #else
283 op.cmd = MMUEXT_PIN_L2_TABLE;
284 #endif
285 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
286 panic("xpq_queue_pin_table");
287 }
288
289 void
290 xpq_queue_unpin_table(paddr_t pa)
291 {
292 struct mmuext_op op;
293 xpq_flush_queue();
294
295 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
296 op.arg1.mfn = pa >> PAGE_SHIFT;
297 op.cmd = MMUEXT_UNPIN_TABLE;
298 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
299 panic("xpq_queue_unpin_table");
300 }
301
302 void
303 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
304 {
305 struct mmuext_op op;
306 xpq_flush_queue();
307
308 XENPRINTK2(("xpq_queue_set_ldt\n"));
309 KASSERT(va == (va & ~PAGE_MASK));
310 op.cmd = MMUEXT_SET_LDT;
311 op.arg1.linear_addr = va;
312 op.arg2.nr_ents = entries;
313 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
314 panic("xpq_queue_set_ldt");
315 }
316
317 void
318 xpq_queue_tlb_flush()
319 {
320 struct mmuext_op op;
321 xpq_flush_queue();
322
323 XENPRINTK2(("xpq_queue_tlb_flush\n"));
324 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
325 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
326 panic("xpq_queue_tlb_flush");
327 }
328
329 void
330 xpq_flush_cache()
331 {
332 struct mmuext_op op;
333 int s = splvm();
334 xpq_flush_queue();
335
336 XENPRINTK2(("xpq_queue_flush_cache\n"));
337 op.cmd = MMUEXT_FLUSH_CACHE;
338 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
339 panic("xpq_flush_cache");
340 splx(s);
341 }
342
343 void
344 xpq_queue_invlpg(vaddr_t va)
345 {
346 struct mmuext_op op;
347 xpq_flush_queue();
348
349 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
350 op.cmd = MMUEXT_INVLPG_LOCAL;
351 op.arg1.linear_addr = (va & ~PAGE_MASK);
352 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
353 panic("xpq_queue_invlpg");
354 }
355
356 int
357 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
358 {
359 mmu_update_t op;
360 int ok;
361 xpq_flush_queue();
362
363 op.ptr = (paddr_t)ptr;
364 op.val = val;
365 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
366 return EFAULT;
367 return (0);
368 }
369 #else /* XEN3 */
370 void
371 xpq_queue_pt_switch(paddr_t pa)
372 {
373
374 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
375 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
376 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
377 xpq_increment_idx();
378 }
379
380 void
381 xpq_queue_pin_table(paddr_t pa)
382 {
383
384 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
385 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
386 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
387 xpq_increment_idx();
388 }
389
390 void
391 xpq_queue_unpin_table(paddr_t pa)
392 {
393
394 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
395 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
396 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
397 xpq_increment_idx();
398 }
399
400 void
401 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
402 {
403
404 XENPRINTK2(("xpq_queue_set_ldt\n"));
405 KASSERT(va == (va & ~PAGE_MASK));
406 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
407 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
408 xpq_increment_idx();
409 }
410
411 void
412 xpq_queue_tlb_flush()
413 {
414
415 XENPRINTK2(("xpq_queue_tlb_flush\n"));
416 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
417 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
418 xpq_increment_idx();
419 }
420
421 void
422 xpq_flush_cache()
423 {
424 int s = splvm();
425
426 XENPRINTK2(("xpq_queue_flush_cache\n"));
427 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
428 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
429 xpq_increment_idx();
430 xpq_flush_queue();
431 splx(s);
432 }
433
434 void
435 xpq_queue_invlpg(vaddr_t va)
436 {
437
438 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
439 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
440 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
441 xpq_increment_idx();
442 }
443
444 int
445 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
446 {
447 mmu_update_t xpq_up[3];
448
449 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
450 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
451 xpq_up[1].ptr = (paddr_t)ptr;
452 xpq_up[1].val = val;
453 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
454 return EFAULT;
455 return (0);
456 }
457 #endif /* XEN3 */
458
459 #ifdef XENDEBUG
460 void
461 xpq_debug_dump()
462 {
463 int i;
464
465 XENPRINTK2(("idx: %d\n", xpq_idx));
466 for (i = 0; i < xpq_idx; i++) {
467 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
468 (u_int)xpq_queue[i].val);
469 if (++i < xpq_idx)
470 sprintf(XBUF + strlen(XBUF), "%x %08x ",
471 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
472 if (++i < xpq_idx)
473 sprintf(XBUF + strlen(XBUF), "%x %08x ",
474 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
475 if (++i < xpq_idx)
476 sprintf(XBUF + strlen(XBUF), "%x %08x ",
477 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
478 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
479 }
480 }
481 #endif
482
483
484 #ifdef __x86_64__
485 extern volatile struct xencons_interface *xencons_interface; /* XXX */
486 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
487
488 static void xen_bt_set_readonly (vaddr_t);
489 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
490
491 /* How many PDEs ? */
492 #if L2_SLOT_KERNBASE > 0
493 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
494 #else
495 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
496 #endif
497
498 /*
499 * Construct and switch to new pagetables
500 * first_avail is the first vaddr we can use after
501 * we get rid of Xen pagetables
502 */
503
504 vaddr_t xen_pmap_bootstrap (void);
505
506 /*
507 * Function to get rid of Xen bootstrap tables
508 */
509
510 vaddr_t
511 xen_pmap_bootstrap()
512 {
513 int count, iocount = 0;
514 vaddr_t bootstrap_tables, init_tables;
515
516 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
517 init_tables = xen_start_info.pt_base;
518 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
519
520 /* Space after Xen boostrap tables should be free */
521 bootstrap_tables = xen_start_info.pt_base +
522 (xen_start_info.nr_pt_frames * PAGE_SIZE);
523
524 /* Calculate how many tables we need */
525 count = TABLE_L2_ENTRIES;
526
527 #ifdef DOM0OPS
528 if (xen_start_info.flags & SIF_INITDOMAIN) {
529 /* space for ISA I/O mem */
530 iocount = IOM_SIZE / PAGE_SIZE;
531 }
532 #endif
533
534 /*
535 * Xen space we'll reclaim may not be enough for our new page tables,
536 * move bootstrap tables if necessary
537 */
538
539 if (bootstrap_tables < init_tables + ((count+3+iocount) * PAGE_SIZE))
540 bootstrap_tables = init_tables +
541 ((count+3+iocount) * PAGE_SIZE);
542
543 /* Create temporary tables */
544 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
545 xen_start_info.nr_pt_frames, count, 0);
546
547 /* get vaddr space for the shared info and the console pages */
548
549 /* Create final tables */
550 xen_bootstrap_tables(bootstrap_tables, init_tables,
551 count + 3, count, 1);
552
553 return (init_tables + ((count + 3) * PAGE_SIZE));
554 }
555
556
557 /*
558 * Build a new table and switch to it
559 * old_count is # of old tables (including PGD, PDTPE and PDE)
560 * new_count is # of new tables (PTE only)
561 * we assume areas don't overlap
562 */
563
564
565 static void
566 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
567 int old_count, int new_count, int final)
568 {
569 pd_entry_t *pdtpe, *pde, *pte;
570 pd_entry_t *cur_pgd, *bt_pgd;
571 paddr_t addr, page;
572 vaddr_t avail, text_end, map_end;
573 int i;
574 extern char __data_start;
575
576 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
577 old_pgd, new_pgd, old_count, new_count));
578 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
579 /*
580 * size of R/W area after kernel text:
581 * xencons_interface (if present)
582 * xenstore_interface (if present)
583 * table pages (new_count + 3 entries)
584 * UAREA
585 * dummy user PGD
586 * extra mappings (only when final is true):
587 * HYPERVISOR_shared_info
588 * ISA I/O mem (if needed)
589 */
590 map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG);
591 if (final) {
592 HYPERVISOR_shared_info = (struct shared_info *)map_end;
593 map_end += NBPG;
594 }
595 #ifdef DOM0OPS
596 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
597 /* ISA I/O mem */
598 atdevbase = map_end;
599 map_end += IOM_SIZE;
600 }
601 #endif /* DOM0OPS */
602
603 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
604 text_end, map_end));
605
606 /*
607 * Create bootstrap page tables
608 * What we need:
609 * - a PGD (level 4)
610 * - a PDTPE (level 3)
611 * - a PDE (level2)
612 * - some PTEs (level 1)
613 */
614
615 cur_pgd = (pd_entry_t *) old_pgd;
616 bt_pgd = (pd_entry_t *) new_pgd;
617 memset (bt_pgd, 0, PAGE_SIZE);
618 avail = new_pgd + PAGE_SIZE;
619
620 /* Install level 3 */
621 pdtpe = (pd_entry_t *) avail;
622 memset (pdtpe, 0, PAGE_SIZE);
623 avail += PAGE_SIZE;
624
625 addr = ((paddr_t) pdtpe) - KERNBASE;
626 bt_pgd[pl4_pi(KERNTEXTOFF)] =
627 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
628
629 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
630 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
631
632 /* Level 2 */
633 pde = (pd_entry_t *) avail;
634 memset(pde, 0, PAGE_SIZE);
635 avail += PAGE_SIZE;
636
637 addr = ((paddr_t) pde) - KERNBASE;
638 pdtpe[pl3_pi(KERNTEXTOFF)] =
639 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
640 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
641 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
642
643 /* Level 1 */
644 page = KERNTEXTOFF;
645 for (i = 0; i < new_count; i ++) {
646 paddr_t cur_page = page;
647
648 pte = (pd_entry_t *) avail;
649 avail += PAGE_SIZE;
650
651 memset(pte, 0, PAGE_SIZE);
652 while (pl2_pi(page) == pl2_pi (cur_page)) {
653 if (page >= map_end) {
654 /* not mapped at all */
655 pte[pl1_pi(page)] = 0;
656 page += PAGE_SIZE;
657 continue;
658 }
659 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
660 if (page == (vaddr_t)HYPERVISOR_shared_info) {
661 pte[pl1_pi(page)] = xen_start_info.shared_info;
662 __PRINTK(("HYPERVISOR_shared_info "
663 "va 0x%lx pte 0x%lx\n",
664 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
665 }
666 if (xpmap_ptom_masked(page - KERNBASE) ==
667 (xen_start_info.console_mfn << PAGE_SHIFT)) {
668 xencons_interface = (void *)page;
669 pte[pl1_pi(page)] =
670 (xen_start_info.console_mfn << PAGE_SHIFT);
671 __PRINTK(("xencons_interface "
672 va 0x%lx pte 0x%lx\n",
673 xencons_interface, pte[pl1_pi(page)]));
674 }
675 if (xpmap_ptom_masked(page - KERNBASE) ==
676 (xen_start_info.store_mfn << PAGE_SHIFT)) {
677 xenstore_interface = (void *)page;
678 pte[pl1_pi(page)] =
679 (xen_start_info.store_mfn << PAGE_SHIFT);
680 __PRINTK(("xenstore_interface "
681 "va 0x%lx pte 0x%lx\n",
682 xenstore_interface, pte[pl1_pi(page)]));
683 }
684 #ifdef DOM0OPS
685 if (page >= (vaddr_t)atdevbase &&
686 page < (vaddr_t)atdevbase + IOM_SIZE) {
687 pte[pl1_pi(page)] =
688 IOM_BEGIN + (page - (vaddr_t)atdevbase);
689 }
690 #endif
691 pte[pl1_pi(page)] |= PG_u | PG_V;
692 if (page < text_end) {
693 /* map kernel text RO */
694 pte[pl1_pi(page)] |= 0;
695 } else if (page >= old_pgd
696 && page < old_pgd + (old_count * PAGE_SIZE)) {
697 /* map old page tables RO */
698 pte[pl1_pi(page)] |= 0;
699 } else if (page >= new_pgd &&
700 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
701 /* map new page tables RO */
702 pte[pl1_pi(page)] |= 0;
703 } else {
704 /* map page RW */
705 pte[pl1_pi(page)] |= PG_RW;
706 }
707 if (page == old_pgd)
708 __PRINTK(("va 0x%lx pa 0x%lx
709 "entry 0x%lx -> L1[0x%x]\n",
710 page, page - KERNBASE,
711 pte[pl1_pi(page)], pl1_pi(page)));
712 page += PAGE_SIZE;
713 }
714
715 addr = ((paddr_t) pte) - KERNBASE;
716 pde[pl2_pi(cur_page)] =
717 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
718 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
719 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
720 /* Mark readonly */
721 xen_bt_set_readonly((vaddr_t) pte);
722 }
723
724 /* Install recursive page tables mapping */
725 bt_pgd[PDIR_SLOT_PTE] =
726 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
727 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
728 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
729
730 /* Mark tables RO */
731 xen_bt_set_readonly((vaddr_t) pde);
732 xen_bt_set_readonly((vaddr_t) pdtpe);
733 xen_bt_set_readonly(new_pgd);
734 /* Pin the PGD */
735 __PRINTK(("pin PDG\n"));
736 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
737 /* Switch to new tables */
738 __PRINTK(("switch to PDG\n"));
739 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
740 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
741 bt_pgd[PDIR_SLOT_PTE]));
742 __PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE));
743 __PRINTK(("value 0x%lx\n", *L4_BASE));
744 __PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE]));
745
746 /* Now we can safely reclaim space taken by old tables */
747
748 __PRINTK(("unpin old PDG\n"));
749 /* Unpin old PGD */
750 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
751 /* Mark old tables RW */
752 page = old_pgd;
753 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
754 addr = xpmap_mtop(addr);
755 pte = (pd_entry_t *) (addr + KERNBASE);
756 pte += pl1_pi(page);
757 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
758 pde[pl2_pi(page)], addr, pte));
759 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
760 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
761 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
762 page += PAGE_SIZE;
763 /*
764 * Our ptes are contiguous
765 * so it's safe to just "++" here
766 */
767 pte++;
768 }
769 xpq_flush_queue();
770 }
771
772
773 void
774 xen_set_user_pgd(paddr_t page)
775 {
776 struct mmuext_op op;
777 int s = splvm();
778
779 xpq_flush_queue();
780 op.cmd = MMUEXT_NEW_USER_BASEPTR;
781 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
782 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
783 panic("xen_set_user_pgd: failed to install new user page"
784 " directory %lx", page);
785 splx(s);
786 }
787
788 /*
789 * Bootstrap helper functions
790 */
791
792 /*
793 * Mark a page readonly
794 * XXX: assuming vaddr = paddr + KERNBASE
795 */
796
797 static void
798 xen_bt_set_readonly (vaddr_t page)
799 {
800 pt_entry_t entry;
801
802 entry = xpmap_ptom_masked(page - KERNBASE);
803 entry |= PG_u | PG_V;
804
805 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
806 }
807 #endif /* x86_64 */
808 /* $NetBSD: x86_xpmap.c,v 1.3.4.2 2007/12/03 19:04:43 ad Exp $ */
809
810 /*
811 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr>
812 *
813 * Permission to use, copy, modify, and distribute this software for any
814 * purpose with or without fee is hereby granted, provided that the above
815 * copyright notice and this permission notice appear in all copies.
816 *
817 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
818 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
819 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
820 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
821 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
822 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
823 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
824 */
825
826 /*
827 * Copyright (c) 2006, 2007 Manuel Bouyer.
828 *
829 * Redistribution and use in source and binary forms, with or without
830 * modification, are permitted provided that the following conditions
831 * are met:
832 * 1. Redistributions of source code must retain the above copyright
833 * notice, this list of conditions and the following disclaimer.
834 * 2. Redistributions in binary form must reproduce the above copyright
835 * notice, this list of conditions and the following disclaimer in the
836 * documentation and/or other materials provided with the distribution.
837 * 3. All advertising materials mentioning features or use of this software
838 * must display the following acknowledgement:
839 * This product includes software developed by Manuel Bouyer.
840 * 4. The name of the author may not be used to endorse or promote products
841 * derived from this software without specific prior written permission.
842 *
843 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
844 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
845 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
846 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
847 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
848 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
849 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
850 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
851 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
852 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
853 *
854 */
855
856 /*
857 *
858 * Copyright (c) 2004 Christian Limpach.
859 * All rights reserved.
860 *
861 * Redistribution and use in source and binary forms, with or without
862 * modification, are permitted provided that the following conditions
863 * are met:
864 * 1. Redistributions of source code must retain the above copyright
865 * notice, this list of conditions and the following disclaimer.
866 * 2. Redistributions in binary form must reproduce the above copyright
867 * notice, this list of conditions and the following disclaimer in the
868 * documentation and/or other materials provided with the distribution.
869 * 3. All advertising materials mentioning features or use of this software
870 * must display the following acknowledgement:
871 * This product includes software developed by Christian Limpach.
872 * 4. The name of the author may not be used to endorse or promote products
873 * derived from this software without specific prior written permission.
874 *
875 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
876 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
877 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
878 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
879 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
880 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
881 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
882 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
883 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
884 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
885 */
886
887
888 #include <sys/cdefs.h>
889 __KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.3.4.2 2007/12/03 19:04:43 ad Exp $");
890
891 #include "opt_xen.h"
892
893 #include <sys/param.h>
894 #include <sys/systm.h>
895
896 #include <uvm/uvm.h>
897
898 #include <machine/pmap.h>
899 #include <machine/gdt.h>
900 #include <xen/xenfunc.h>
901
902 #include <dev/isa/isareg.h>
903 #include <machine/isa_machdep.h>
904
905 #undef XENDEBUG
906 /* #define XENDEBUG_SYNC */
907 /* #define XENDEBUG_LOW */
908
909 #ifdef XENDEBUG
910 #define XENPRINTF(x) printf x
911 #define XENPRINTK(x) printk x
912 #define XENPRINTK2(x) /* printk x */
913
914 static char XBUF[256];
915 #else
916 #define XENPRINTF(x)
917 #define XENPRINTK(x)
918 #define XENPRINTK2(x)
919 #endif
920 #define PRINTF(x) printf x
921 #define PRINTK(x) printk x
922
923 volatile shared_info_t *HYPERVISOR_shared_info;
924 union start_info_union start_info_union;
925
926 void xen_failsafe_handler(void);
927
928 #ifdef XEN3
929 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
930 HYPERVISOR_mmu_update((req), (count), (success_count), DOMID_SELF)
931 #else
932 #define HYPERVISOR_mmu_update_self(req, count, success_count) \
933 HYPERVISOR_mmu_update((req), (count), (success_count))
934 #endif
935
936 void
937 xen_failsafe_handler(void)
938 {
939
940 panic("xen_failsafe_handler called!\n");
941 }
942
943
944 #ifndef __x86_64__
945 void
946 xen_update_descriptor(union descriptor *table, union descriptor *entry)
947 {
948 paddr_t pa;
949 pt_entry_t *ptp;
950
951 ptp = kvtopte((vaddr_t)table);
952 pa = (*ptp & PG_FRAME) | ((vaddr_t)table & ~PG_FRAME);
953 if (HYPERVISOR_update_descriptor(pa, entry->raw[0], entry->raw[1]))
954 panic("HYPERVISOR_update_descriptor failed\n");
955 }
956 #endif
957
958 void
959 xen_set_ldt(vaddr_t base, uint32_t entries)
960 {
961 vaddr_t va;
962 vaddr_t end;
963 pt_entry_t *ptp, *maptp;
964 int s;
965
966 #ifdef __x86_64__
967 end = base + (entries << 3);
968 #else
969 end = base + entries * sizeof(union descriptor);
970 #endif
971
972 for (va = base; va < end; va += PAGE_SIZE) {
973 KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
974 ptp = kvtopte(va);
975 maptp = (pt_entry_t *)vtomach((vaddr_t)ptp);
976 XENPRINTF(("xen_set_ldt %p %d %p %p\n", (void *)base,
977 entries, ptp, maptp));
978 PTE_CLEARBITS(ptp, maptp, PG_RW);
979 }
980 s = splvm();
981 PTE_UPDATES_FLUSH();
982
983 xpq_queue_set_ldt(base, entries);
984 xpq_flush_queue();
985 splx(s);
986 }
987
988 #ifdef XENDEBUG
989 void xpq_debug_dump(void);
990 #endif
991
992 #define XPQUEUE_SIZE 2048
993 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
994 static int xpq_idx = 0;
995
996 void
997 xpq_flush_queue()
998 {
999 int i, ok;
1000
1001 XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
1002 for (i = 0; i < xpq_idx; i++)
1003 XENPRINTK2(("%d: %p %08x\n", i, (u_int)xpq_queue[i].ptr,
1004 (u_int)xpq_queue[i].val));
1005 if (xpq_idx != 0 &&
1006 HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok) < 0) {
1007 printf("xpq_flush_queue: %d entries \n", xpq_idx);
1008 for (i = 0; i < xpq_idx; i++)
1009 printf("0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
1010 (u_int64_t)xpq_queue[i].ptr,
1011 (u_int64_t)xpq_queue[i].val);
1012 panic("HYPERVISOR_mmu_update failed\n");
1013 }
1014 xpq_idx = 0;
1015 }
1016
1017 static inline void
1018 xpq_increment_idx(void)
1019 {
1020
1021 xpq_idx++;
1022 if (__predict_false(xpq_idx == XPQUEUE_SIZE))
1023 xpq_flush_queue();
1024 }
1025
1026 void
1027 xpq_queue_machphys_update(paddr_t ma, paddr_t pa)
1028 {
1029 XENPRINTK2(("xpq_queue_machphys_update ma=%p pa=%p\n", (void *)ma, (void *)pa));
1030 xpq_queue[xpq_idx].ptr = ma | MMU_MACHPHYS_UPDATE;
1031 xpq_queue[xpq_idx].val = (pa - XPMAP_OFFSET) >> PAGE_SHIFT;
1032 xpq_increment_idx();
1033 #ifdef XENDEBUG_SYNC
1034 xpq_flush_queue();
1035 #endif
1036 }
1037
1038 void
1039 xpq_queue_pde_update(pd_entry_t *ptr, pd_entry_t val)
1040 {
1041
1042 KASSERT(((paddr_t)ptr & 3) == 0);
1043 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
1044 xpq_queue[xpq_idx].val = val;
1045 xpq_increment_idx();
1046 #ifdef XENDEBUG_SYNC
1047 xpq_flush_queue();
1048 #endif
1049 }
1050
1051 void
1052 xpq_queue_pte_update(pt_entry_t *ptr, pt_entry_t val)
1053 {
1054
1055 KASSERT(((paddr_t)ptr & 3) == 0);
1056 xpq_queue[xpq_idx].ptr = (paddr_t)ptr | MMU_NORMAL_PT_UPDATE;
1057 xpq_queue[xpq_idx].val = val;
1058 xpq_increment_idx();
1059 #ifdef XENDEBUG_SYNC
1060 xpq_flush_queue();
1061 #endif
1062 }
1063
1064 #ifdef XEN3
1065 void
1066 xpq_queue_pt_switch(paddr_t pa)
1067 {
1068 struct mmuext_op op;
1069 xpq_flush_queue();
1070
1071 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
1072 op.cmd = MMUEXT_NEW_BASEPTR;
1073 op.arg1.mfn = pa >> PAGE_SHIFT;
1074 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1075 panic("xpq_queue_pt_switch");
1076 }
1077
1078 void
1079 xpq_queue_pin_table(paddr_t pa)
1080 {
1081 struct mmuext_op op;
1082 xpq_flush_queue();
1083
1084 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
1085 op.arg1.mfn = pa >> PAGE_SHIFT;
1086
1087 #ifdef __x86_64__
1088 op.cmd = MMUEXT_PIN_L4_TABLE;
1089 #else
1090 op.cmd = MMUEXT_PIN_L2_TABLE;
1091 #endif
1092 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1093 panic("xpq_queue_pin_table");
1094 }
1095
1096 void
1097 xpq_queue_unpin_table(paddr_t pa)
1098 {
1099 struct mmuext_op op;
1100 xpq_flush_queue();
1101
1102 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
1103 op.arg1.mfn = pa >> PAGE_SHIFT;
1104 op.cmd = MMUEXT_UNPIN_TABLE;
1105 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1106 panic("xpq_queue_unpin_table");
1107 }
1108
1109 void
1110 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
1111 {
1112 struct mmuext_op op;
1113 xpq_flush_queue();
1114
1115 XENPRINTK2(("xpq_queue_set_ldt\n"));
1116 KASSERT(va == (va & ~PAGE_MASK));
1117 op.cmd = MMUEXT_SET_LDT;
1118 op.arg1.linear_addr = va;
1119 op.arg2.nr_ents = entries;
1120 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1121 panic("xpq_queue_set_ldt");
1122 }
1123
1124 void
1125 xpq_queue_tlb_flush()
1126 {
1127 struct mmuext_op op;
1128 xpq_flush_queue();
1129
1130 XENPRINTK2(("xpq_queue_tlb_flush\n"));
1131 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
1132 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1133 panic("xpq_queue_tlb_flush");
1134 }
1135
1136 void
1137 xpq_flush_cache()
1138 {
1139 struct mmuext_op op;
1140 int s = splvm();
1141 xpq_flush_queue();
1142
1143 XENPRINTK2(("xpq_queue_flush_cache\n"));
1144 op.cmd = MMUEXT_FLUSH_CACHE;
1145 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1146 panic("xpq_flush_cache");
1147 splx(s);
1148 }
1149
1150 void
1151 xpq_queue_invlpg(vaddr_t va)
1152 {
1153 struct mmuext_op op;
1154 xpq_flush_queue();
1155
1156 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
1157 op.cmd = MMUEXT_INVLPG_LOCAL;
1158 op.arg1.linear_addr = (va & ~PAGE_MASK);
1159 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1160 panic("xpq_queue_invlpg");
1161 }
1162
1163 int
1164 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
1165 {
1166 mmu_update_t op;
1167 int ok;
1168 xpq_flush_queue();
1169
1170 op.ptr = (paddr_t)ptr;
1171 op.val = val;
1172 if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0)
1173 return EFAULT;
1174 return (0);
1175 }
1176 #else /* XEN3 */
1177 void
1178 xpq_queue_pt_switch(paddr_t pa)
1179 {
1180
1181 XENPRINTK2(("xpq_queue_pt_switch: %p %p\n", (void *)pa, (void *)pa));
1182 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
1183 xpq_queue[xpq_idx].val = MMUEXT_NEW_BASEPTR;
1184 xpq_increment_idx();
1185 }
1186
1187 void
1188 xpq_queue_pin_table(paddr_t pa)
1189 {
1190
1191 XENPRINTK2(("xpq_queue_pin_table: %p %p\n", (void *)pa, (void *)pa));
1192 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
1193 xpq_queue[xpq_idx].val = MMUEXT_PIN_L2_TABLE;
1194 xpq_increment_idx();
1195 }
1196
1197 void
1198 xpq_queue_unpin_table(paddr_t pa)
1199 {
1200
1201 XENPRINTK2(("xpq_queue_unpin_table: %p %p\n", (void *)pa, (void *)pa));
1202 xpq_queue[xpq_idx].ptr = pa | MMU_EXTENDED_COMMAND;
1203 xpq_queue[xpq_idx].val = MMUEXT_UNPIN_TABLE;
1204 xpq_increment_idx();
1205 }
1206
1207 void
1208 xpq_queue_set_ldt(vaddr_t va, uint32_t entries)
1209 {
1210
1211 XENPRINTK2(("xpq_queue_set_ldt\n"));
1212 KASSERT(va == (va & ~PAGE_MASK));
1213 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND | va;
1214 xpq_queue[xpq_idx].val = MMUEXT_SET_LDT | (entries << MMUEXT_CMD_SHIFT);
1215 xpq_increment_idx();
1216 }
1217
1218 void
1219 xpq_queue_tlb_flush()
1220 {
1221
1222 XENPRINTK2(("xpq_queue_tlb_flush\n"));
1223 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
1224 xpq_queue[xpq_idx].val = MMUEXT_TLB_FLUSH;
1225 xpq_increment_idx();
1226 }
1227
1228 void
1229 xpq_flush_cache()
1230 {
1231 int s = splvm();
1232
1233 XENPRINTK2(("xpq_queue_flush_cache\n"));
1234 xpq_queue[xpq_idx].ptr = MMU_EXTENDED_COMMAND;
1235 xpq_queue[xpq_idx].val = MMUEXT_FLUSH_CACHE;
1236 xpq_increment_idx();
1237 xpq_flush_queue();
1238 splx(s);
1239 }
1240
1241 void
1242 xpq_queue_invlpg(vaddr_t va)
1243 {
1244
1245 XENPRINTK2(("xpq_queue_invlpg %p\n", (void *)va));
1246 xpq_queue[xpq_idx].ptr = (va & ~PAGE_MASK) | MMU_EXTENDED_COMMAND;
1247 xpq_queue[xpq_idx].val = MMUEXT_INVLPG;
1248 xpq_increment_idx();
1249 }
1250
1251 int
1252 xpq_update_foreign(pt_entry_t *ptr, pt_entry_t val, int dom)
1253 {
1254 mmu_update_t xpq_up[3];
1255
1256 xpq_up[0].ptr = MMU_EXTENDED_COMMAND;
1257 xpq_up[0].val = MMUEXT_SET_FOREIGNDOM | (dom << 16);
1258 xpq_up[1].ptr = (paddr_t)ptr;
1259 xpq_up[1].val = val;
1260 if (HYPERVISOR_mmu_update_self(xpq_up, 2, NULL) < 0)
1261 return EFAULT;
1262 return (0);
1263 }
1264 #endif /* XEN3 */
1265
1266 #ifdef XENDEBUG
1267 void
1268 xpq_debug_dump()
1269 {
1270 int i;
1271
1272 XENPRINTK2(("idx: %d\n", xpq_idx));
1273 for (i = 0; i < xpq_idx; i++) {
1274 sprintf(XBUF, "%x %08x ", (u_int)xpq_queue[i].ptr,
1275 (u_int)xpq_queue[i].val);
1276 if (++i < xpq_idx)
1277 sprintf(XBUF + strlen(XBUF), "%x %08x ",
1278 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
1279 if (++i < xpq_idx)
1280 sprintf(XBUF + strlen(XBUF), "%x %08x ",
1281 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
1282 if (++i < xpq_idx)
1283 sprintf(XBUF + strlen(XBUF), "%x %08x ",
1284 (u_int)xpq_queue[i].ptr, (u_int)xpq_queue[i].val);
1285 XENPRINTK2(("%d: %s\n", xpq_idx, XBUF));
1286 }
1287 }
1288 #endif
1289
1290
1291 #ifdef __x86_64__
1292 extern volatile struct xencons_interface *xencons_interface; /* XXX */
1293 extern struct xenstore_domain_interface *xenstore_interface; /* XXX */
1294
1295 static void xen_bt_set_readonly (vaddr_t);
1296 static void xen_bootstrap_tables (vaddr_t, vaddr_t, int, int, int);
1297
1298 /* How many PDEs ? */
1299 #if L2_SLOT_KERNBASE > 0
1300 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
1301 #else
1302 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
1303 #endif
1304
1305 /*
1306 * Construct and switch to new pagetables
1307 * first_avail is the first vaddr we can use after
1308 * we get rid of Xen pagetables
1309 */
1310
1311 vaddr_t xen_pmap_bootstrap (void);
1312
1313 /*
1314 * Function to get rid of Xen bootstrap tables
1315 */
1316
1317 vaddr_t
1318 xen_pmap_bootstrap()
1319 {
1320 int count, iocount = 0;
1321 vaddr_t bootstrap_tables, init_tables;
1322
1323 xpmap_phys_to_machine_mapping = (paddr_t *) xen_start_info.mfn_list;
1324 init_tables = xen_start_info.pt_base;
1325 __PRINTK(("xen_arch_pmap_bootstrap init_tables=0x%lx\n", init_tables));
1326
1327 /* Space after Xen boostrap tables should be free */
1328 bootstrap_tables = xen_start_info.pt_base +
1329 (xen_start_info.nr_pt_frames * PAGE_SIZE);
1330
1331 /* Calculate how many tables we need */
1332 count = TABLE_L2_ENTRIES;
1333
1334 #ifdef DOM0OPS
1335 if (xen_start_info.flags & SIF_INITDOMAIN) {
1336 /* space for ISA I/O mem */
1337 iocount = IOM_SIZE / PAGE_SIZE;
1338 }
1339 #endif
1340
1341 /*
1342 * Xen space we'll reclaim may not be enough for our new page tables,
1343 * move bootstrap tables if necessary
1344 */
1345
1346 if (bootstrap_tables < init_tables + ((count+3+iocount) * PAGE_SIZE))
1347 bootstrap_tables = init_tables +
1348 ((count+3+iocount) * PAGE_SIZE);
1349
1350 /* Create temporary tables */
1351 xen_bootstrap_tables(xen_start_info.pt_base, bootstrap_tables,
1352 xen_start_info.nr_pt_frames, count, 0);
1353
1354 /* get vaddr space for the shared info and the console pages */
1355
1356 /* Create final tables */
1357 xen_bootstrap_tables(bootstrap_tables, init_tables,
1358 count + 3, count, 1);
1359
1360 return (init_tables + ((count + 3) * PAGE_SIZE));
1361 }
1362
1363
1364 /*
1365 * Build a new table and switch to it
1366 * old_count is # of old tables (including PGD, PDTPE and PDE)
1367 * new_count is # of new tables (PTE only)
1368 * we assume areas don't overlap
1369 */
1370
1371
1372 static void
1373 xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
1374 int old_count, int new_count, int final)
1375 {
1376 pd_entry_t *pdtpe, *pde, *pte;
1377 pd_entry_t *cur_pgd, *bt_pgd;
1378 paddr_t addr, page;
1379 vaddr_t avail, text_end, map_end;
1380 int i;
1381 extern char __data_start;
1382
1383 __PRINTK(("xen_bootstrap_tables(0x%lx, 0x%lx, %d, %d)\n",
1384 old_pgd, new_pgd, old_count, new_count));
1385 text_end = ((vaddr_t)&__data_start) & ~PAGE_MASK;
1386 /*
1387 * size of R/W area after kernel text:
1388 * xencons_interface (if present)
1389 * xenstore_interface (if present)
1390 * table pages (new_count + 3 entries)
1391 * UAREA
1392 * dummy user PGD
1393 * extra mappings (only when final is true):
1394 * HYPERVISOR_shared_info
1395 * ISA I/O mem (if needed)
1396 */
1397 map_end = new_pgd + ((new_count + 3 + UPAGES + 1) * NBPG);
1398 if (final) {
1399 HYPERVISOR_shared_info = (struct shared_info *)map_end;
1400 map_end += NBPG;
1401 }
1402 #ifdef DOM0OPS
1403 if (final && (xen_start_info.flags & SIF_INITDOMAIN)) {
1404 /* ISA I/O mem */
1405 atdevbase = map_end;
1406 map_end += IOM_SIZE;
1407 }
1408 #endif /* DOM0OPS */
1409
1410 __PRINTK(("xen_bootstrap_tables text_end 0x%lx map_end 0x%lx\n",
1411 text_end, map_end));
1412
1413 /*
1414 * Create bootstrap page tables
1415 * What we need:
1416 * - a PGD (level 4)
1417 * - a PDTPE (level 3)
1418 * - a PDE (level2)
1419 * - some PTEs (level 1)
1420 */
1421
1422 cur_pgd = (pd_entry_t *) old_pgd;
1423 bt_pgd = (pd_entry_t *) new_pgd;
1424 memset (bt_pgd, 0, PAGE_SIZE);
1425 avail = new_pgd + PAGE_SIZE;
1426
1427 /* Install level 3 */
1428 pdtpe = (pd_entry_t *) avail;
1429 memset (pdtpe, 0, PAGE_SIZE);
1430 avail += PAGE_SIZE;
1431
1432 addr = ((paddr_t) pdtpe) - KERNBASE;
1433 bt_pgd[pl4_pi(KERNTEXTOFF)] =
1434 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
1435
1436 __PRINTK(("L3 va 0x%lx pa 0x%lx entry 0x%lx -> L4[0x%x]\n",
1437 pdtpe, addr, bt_pgd[pl4_pi(KERNTEXTOFF)], pl4_pi(KERNTEXTOFF)));
1438
1439 /* Level 2 */
1440 pde = (pd_entry_t *) avail;
1441 memset(pde, 0, PAGE_SIZE);
1442 avail += PAGE_SIZE;
1443
1444 addr = ((paddr_t) pde) - KERNBASE;
1445 pdtpe[pl3_pi(KERNTEXTOFF)] =
1446 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
1447 __PRINTK(("L2 va 0x%lx pa 0x%lx entry 0x%lx -> L3[0x%x]\n",
1448 pde, addr, pdtpe[pl3_pi(KERNTEXTOFF)], pl3_pi(KERNTEXTOFF)));
1449
1450 /* Level 1 */
1451 page = KERNTEXTOFF;
1452 for (i = 0; i < new_count; i ++) {
1453 paddr_t cur_page = page;
1454
1455 pte = (pd_entry_t *) avail;
1456 avail += PAGE_SIZE;
1457
1458 memset(pte, 0, PAGE_SIZE);
1459 while (pl2_pi(page) == pl2_pi (cur_page)) {
1460 if (page >= map_end) {
1461 /* not mapped at all */
1462 pte[pl1_pi(page)] = 0;
1463 page += PAGE_SIZE;
1464 continue;
1465 }
1466 pte[pl1_pi(page)] = xpmap_ptom_masked(page - KERNBASE);
1467 if (page == (vaddr_t)HYPERVISOR_shared_info) {
1468 pte[pl1_pi(page)] = xen_start_info.shared_info;
1469 __PRINTK(("HYPERVISOR_shared_info "
1470 "va 0x%lx pte 0x%lx\n",
1471 HYPERVISOR_shared_info, pte[pl1_pi(page)]));
1472 }
1473 if (xpmap_ptom_masked(page - KERNBASE) ==
1474 (xen_start_info.console_mfn << PAGE_SHIFT)) {
1475 xencons_interface = (void *)page;
1476 pte[pl1_pi(page)] =
1477 (xen_start_info.console_mfn << PAGE_SHIFT);
1478 __PRINTK(("xencons_interface "
1479 va 0x%lx pte 0x%lx\n",
1480 xencons_interface, pte[pl1_pi(page)]));
1481 }
1482 if (xpmap_ptom_masked(page - KERNBASE) ==
1483 (xen_start_info.store_mfn << PAGE_SHIFT)) {
1484 xenstore_interface = (void *)page;
1485 pte[pl1_pi(page)] =
1486 (xen_start_info.store_mfn << PAGE_SHIFT);
1487 __PRINTK(("xenstore_interface "
1488 "va 0x%lx pte 0x%lx\n",
1489 xenstore_interface, pte[pl1_pi(page)]));
1490 }
1491 #ifdef DOM0OPS
1492 if (page >= (vaddr_t)atdevbase &&
1493 page < (vaddr_t)atdevbase + IOM_SIZE) {
1494 pte[pl1_pi(page)] =
1495 IOM_BEGIN + (page - (vaddr_t)atdevbase);
1496 }
1497 #endif
1498 pte[pl1_pi(page)] |= PG_u | PG_V;
1499 if (page < text_end) {
1500 /* map kernel text RO */
1501 pte[pl1_pi(page)] |= 0;
1502 } else if (page >= old_pgd
1503 && page < old_pgd + (old_count * PAGE_SIZE)) {
1504 /* map old page tables RO */
1505 pte[pl1_pi(page)] |= 0;
1506 } else if (page >= new_pgd &&
1507 page < new_pgd + ((new_count + 3) * PAGE_SIZE)) {
1508 /* map new page tables RO */
1509 pte[pl1_pi(page)] |= 0;
1510 } else {
1511 /* map page RW */
1512 pte[pl1_pi(page)] |= PG_RW;
1513 }
1514 if (page == old_pgd)
1515 __PRINTK(("va 0x%lx pa 0x%lx
1516 "entry 0x%lx -> L1[0x%x]\n",
1517 page, page - KERNBASE,
1518 pte[pl1_pi(page)], pl1_pi(page)));
1519 page += PAGE_SIZE;
1520 }
1521
1522 addr = ((paddr_t) pte) - KERNBASE;
1523 pde[pl2_pi(cur_page)] =
1524 xpmap_ptom_masked(addr) | PG_u | PG_RW | PG_V;
1525 __PRINTK(("L1 va 0x%lx pa 0x%lx entry 0x%lx -> L2[0x%x]\n",
1526 pte, addr, pde[pl2_pi(cur_page)], pl2_pi(cur_page)));
1527 /* Mark readonly */
1528 xen_bt_set_readonly((vaddr_t) pte);
1529 }
1530
1531 /* Install recursive page tables mapping */
1532 bt_pgd[PDIR_SLOT_PTE] =
1533 xpmap_ptom_masked(new_pgd - KERNBASE) | PG_u | PG_V;
1534 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] va 0x%lx pa 0x%lx entry 0x%lx\n",
1535 new_pgd, new_pgd - KERNBASE, bt_pgd[PDIR_SLOT_PTE]));
1536
1537 /* Mark tables RO */
1538 xen_bt_set_readonly((vaddr_t) pde);
1539 xen_bt_set_readonly((vaddr_t) pdtpe);
1540 xen_bt_set_readonly(new_pgd);
1541 /* Pin the PGD */
1542 __PRINTK(("pin PDG\n"));
1543 xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
1544 /* Switch to new tables */
1545 __PRINTK(("switch to PDG\n"));
1546 xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
1547 __PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry 0x%lx\n",
1548 bt_pgd[PDIR_SLOT_PTE]));
1549 __PRINTK(("L4_BASE va 0x%lx\n", (long)L4_BASE));
1550 __PRINTK(("value 0x%lx\n", *L4_BASE));
1551 __PRINTK(("[PDIR_SLOT_PTE] 0x%lx\n", L4_BASE[PDIR_SLOT_PTE]));
1552
1553 /* Now we can safely reclaim space taken by old tables */
1554
1555 __PRINTK(("unpin old PDG\n"));
1556 /* Unpin old PGD */
1557 xpq_queue_unpin_table(xpmap_ptom_masked(old_pgd - KERNBASE));
1558 /* Mark old tables RW */
1559 page = old_pgd;
1560 addr = (paddr_t) pde[pl2_pi(page)] & PG_FRAME;
1561 addr = xpmap_mtop(addr);
1562 pte = (pd_entry_t *) (addr + KERNBASE);
1563 pte += pl1_pi(page);
1564 __PRINTK(("*pde 0x%lx addr 0x%lx pte 0x%lx\n",
1565 pde[pl2_pi(page)], addr, pte));
1566 while (page < old_pgd + (old_count * PAGE_SIZE) && page < map_end) {
1567 addr = xpmap_ptom(((paddr_t) pte) - KERNBASE);
1568 xpq_queue_pte_update((pt_entry_t *) addr, *pte | PG_RW);
1569 page += PAGE_SIZE;
1570 /*
1571 * Our ptes are contiguous
1572 * so it's safe to just "++" here
1573 */
1574 pte++;
1575 }
1576 xpq_flush_queue();
1577 }
1578
1579
1580 void
1581 xen_set_user_pgd(paddr_t page)
1582 {
1583 struct mmuext_op op;
1584 int s = splvm();
1585
1586 xpq_flush_queue();
1587 op.cmd = MMUEXT_NEW_USER_BASEPTR;
1588 op.arg1.mfn = xpmap_phys_to_machine_mapping[page >> PAGE_SHIFT];
1589 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0)
1590 panic("xen_set_user_pgd: failed to install new user page"
1591 " directory %lx", page);
1592 splx(s);
1593 }
1594
1595 /*
1596 * Bootstrap helper functions
1597 */
1598
1599 /*
1600 * Mark a page readonly
1601 * XXX: assuming vaddr = paddr + KERNBASE
1602 */
1603
1604 static void
1605 xen_bt_set_readonly (vaddr_t page)
1606 {
1607 pt_entry_t entry;
1608
1609 entry = xpmap_ptom_masked(page - KERNBASE);
1610 entry |= PG_u | PG_V;
1611
1612 HYPERVISOR_update_va_mapping (page, entry, UVMF_INVLPG);
1613 }
1614 #endif /* x86_64 */
1615